diff options
Diffstat (limited to 'target/linux/generic/backport-5.4')
270 files changed, 0 insertions, 85566 deletions
diff --git a/target/linux/generic/backport-5.4/010-Kbuild-don-t-hardcode-path-to-awk-in-scripts-ld-vers.patch b/target/linux/generic/backport-5.4/010-Kbuild-don-t-hardcode-path-to-awk-in-scripts-ld-vers.patch deleted file mode 100644 index 7ac4f9d240..0000000000 --- a/target/linux/generic/backport-5.4/010-Kbuild-don-t-hardcode-path-to-awk-in-scripts-ld-vers.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 13b1ecc3401653a355798eb1dee10cc1608202f4 Mon Sep 17 00:00:00 2001 -From: Felix Fietkau <nbd@nbd.name> -Date: Mon, 18 Jan 2016 12:27:49 +0100 -Subject: [PATCH 33/34] Kbuild: don't hardcode path to awk in - scripts/ld-version.sh - -On some systems /usr/bin/awk does not exist, or is broken. Find it via -$PATH instead. - -Signed-off-by: Felix Fietkau <nbd@nbd.name> ---- - scripts/ld-version.sh | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/scripts/ld-version.sh -+++ b/scripts/ld-version.sh -@@ -1,6 +1,7 @@ --#!/usr/bin/awk -f -+#!/bin/sh - # SPDX-License-Identifier: GPL-2.0 - # extract linker version number from stdin and turn into single number -+exec awk ' - { - gsub(".*\\)", ""); - gsub(".*version ", ""); -@@ -9,3 +10,4 @@ - print a[1]*100000000 + a[2]*1000000 + a[3]*10000; - exit - } -+' diff --git a/target/linux/generic/backport-5.4/011-kbuild-export-SUBARCH.patch b/target/linux/generic/backport-5.4/011-kbuild-export-SUBARCH.patch deleted file mode 100644 index 60defa39c5..0000000000 --- a/target/linux/generic/backport-5.4/011-kbuild-export-SUBARCH.patch +++ /dev/null @@ -1,21 +0,0 @@ -From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001 -From: Felix Fietkau <nbd@nbd.name> -Date: Sun, 9 Jul 2017 00:26:53 +0200 -Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86 - -Signed-off-by: Felix Fietkau <nbd@nbd.name> ---- - Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/Makefile -+++ b/Makefile -@@ -493,7 +493,7 @@ KBUILD_LDFLAGS := - GCC_PLUGINS_CFLAGS := - CLANG_FLAGS := - --export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC -+export ARCH SRCARCH SUBARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC - export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL - export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX - export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ diff --git a/target/linux/generic/backport-5.4/025-power-reset-add-driver-for-LinkStation-power-off.patch b/target/linux/generic/backport-5.4/025-power-reset-add-driver-for-LinkStation-power-off.patch deleted file mode 100644 index 7bf48c3405..0000000000 --- a/target/linux/generic/backport-5.4/025-power-reset-add-driver-for-LinkStation-power-off.patch +++ /dev/null @@ -1,199 +0,0 @@ -From a7f79f99541eff4e6bcae0014eb08d3019337565 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20Gonz=C3=A1lez=20Cabanelas?= <dgcbueu@gmail.com> -Date: Wed, 15 Jul 2020 15:35:14 +0200 -Subject: [PATCH] power: reset: add driver for LinkStation power off -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Some Buffalo LinkStations perform the power off operation, at restart -time, depending on the state of an output pin (LED2/INTn) at the ethernet -PHY. This pin is also used to wake the machine when a WoL packet is -received by the PHY. - -The driver is required by the Buffalo LinkStation LS421DE (ARM MVEBU), -and other models. Without it, the board remains forever halted if a -power off command is executed, unless the PSU is disconnected and -connected again. - -Add the driver to provide the power off function and also make the WoL -feature to be available. - -Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com> -Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com> ---- - drivers/power/reset/Kconfig | 11 ++ - drivers/power/reset/Makefile | 1 + - drivers/power/reset/linkstation-poweroff.c | 136 +++++++++++++++++++++ - 3 files changed, 148 insertions(+) - create mode 100644 drivers/power/reset/linkstation-poweroff.c - ---- a/drivers/power/reset/Kconfig -+++ b/drivers/power/reset/Kconfig -@@ -99,6 +99,17 @@ config POWER_RESET_HISI - help - Reboot support for Hisilicon boards. - -+config POWER_RESET_LINKSTATION -+ tristate "Buffalo LinkStation power-off driver" -+ depends on ARCH_MVEBU || COMPILE_TEST -+ depends on OF_MDIO && PHYLIB -+ help -+ This driver supports turning off some Buffalo LinkStations by -+ setting an output pin at the ethernet PHY to the correct state. -+ It also makes the device compatible with the WoL function. -+ -+ Say Y here if you have a Buffalo LinkStation LS421D/E. -+ - config POWER_RESET_MSM - bool "Qualcomm MSM power-off driver" - depends on ARCH_QCOM ---- a/drivers/power/reset/Makefile -+++ b/drivers/power/reset/Makefile -@@ -10,6 +10,7 @@ obj-$(CONFIG_POWER_RESET_GEMINI_POWEROFF - obj-$(CONFIG_POWER_RESET_GPIO) += gpio-poweroff.o - obj-$(CONFIG_POWER_RESET_GPIO_RESTART) += gpio-restart.o - obj-$(CONFIG_POWER_RESET_HISI) += hisi-reboot.o -+obj-${CONFIG_POWER_RESET_LINKSTATION} += linkstation-poweroff.o - obj-$(CONFIG_POWER_RESET_MSM) += msm-poweroff.o - obj-$(CONFIG_POWER_RESET_QCOM_PON) += qcom-pon.o - obj-$(CONFIG_POWER_RESET_OCELOT_RESET) += ocelot-reset.o ---- /dev/null -+++ b/drivers/power/reset/linkstation-poweroff.c -@@ -0,0 +1,136 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * LinkStation power off restart driver -+ * Copyright (C) 2020 Daniel González Cabanelas <dgcbueu@gmail.com> -+ */ -+ -+#include <linux/module.h> -+#include <linux/notifier.h> -+#include <linux/of.h> -+#include <linux/of_mdio.h> -+#include <linux/of_platform.h> -+#include <linux/reboot.h> -+#include <linux/phy.h> -+ -+/* Defines from the eth phy Marvell driver */ -+#define MII_MARVELL_COPPER_PAGE 0 -+#define MII_MARVELL_LED_PAGE 3 -+#define MII_MARVELL_WOL_PAGE 17 -+#define MII_MARVELL_PHY_PAGE 22 -+ -+#define MII_PHY_LED_CTRL 16 -+#define MII_88E1318S_PHY_LED_TCR 18 -+#define MII_88E1318S_PHY_WOL_CTRL 16 -+#define MII_M1011_IEVENT 19 -+ -+#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE BIT(7) -+#define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15) -+#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS BIT(12) -+#define LED2_FORCE_ON (0x8 << 8) -+#define LEDMASK GENMASK(11,8) -+ -+static struct phy_device *phydev; -+ -+static void mvphy_reg_intn(u16 data) -+{ -+ int rc = 0, saved_page; -+ -+ saved_page = phy_select_page(phydev, MII_MARVELL_LED_PAGE); -+ if (saved_page < 0) -+ goto err; -+ -+ /* Force manual LED2 control to let INTn work */ -+ __phy_modify(phydev, MII_PHY_LED_CTRL, LEDMASK, LED2_FORCE_ON); -+ -+ /* Set the LED[2]/INTn pin to the required state */ -+ __phy_modify(phydev, MII_88E1318S_PHY_LED_TCR, -+ MII_88E1318S_PHY_LED_TCR_FORCE_INT, -+ MII_88E1318S_PHY_LED_TCR_INTn_ENABLE | data); -+ -+ if (!data) { -+ /* Clear interrupts to ensure INTn won't be holded in high state */ -+ __phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_MARVELL_COPPER_PAGE); -+ __phy_read(phydev, MII_M1011_IEVENT); -+ -+ /* If WOL was enabled and a magic packet was received before powering -+ * off, we won't be able to wake up by sending another magic packet. -+ * Clear WOL status. -+ */ -+ __phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_MARVELL_WOL_PAGE); -+ __phy_set_bits(phydev, MII_88E1318S_PHY_WOL_CTRL, -+ MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS); -+ } -+err: -+ rc = phy_restore_page(phydev, saved_page, rc); -+ if (rc < 0) -+ dev_err(&phydev->mdio.dev, "Write register failed, %d\n", rc); -+} -+ -+static int linkstation_reboot_notifier(struct notifier_block *nb, -+ unsigned long action, void *unused) -+{ -+ if (action == SYS_RESTART) -+ mvphy_reg_intn(MII_88E1318S_PHY_LED_TCR_FORCE_INT); -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block linkstation_reboot_nb = { -+ .notifier_call = linkstation_reboot_notifier, -+}; -+ -+static void linkstation_poweroff(void) -+{ -+ unregister_reboot_notifier(&linkstation_reboot_nb); -+ mvphy_reg_intn(0); -+ -+ kernel_restart("Power off"); -+} -+ -+static const struct of_device_id ls_poweroff_of_match[] = { -+ { .compatible = "buffalo,ls421d" }, -+ { .compatible = "buffalo,ls421de" }, -+ { }, -+}; -+ -+static int __init linkstation_poweroff_init(void) -+{ -+ struct mii_bus *bus; -+ struct device_node *dn; -+ -+ dn = of_find_matching_node(NULL, ls_poweroff_of_match); -+ if (!dn) -+ return -ENODEV; -+ of_node_put(dn); -+ -+ dn = of_find_node_by_name(NULL, "mdio"); -+ if (!dn) -+ return -ENODEV; -+ -+ bus = of_mdio_find_bus(dn); -+ of_node_put(dn); -+ if (!bus) -+ return -EPROBE_DEFER; -+ -+ phydev = phy_find_first(bus); -+ if (!phydev) -+ return -EPROBE_DEFER; -+ -+ register_reboot_notifier(&linkstation_reboot_nb); -+ pm_power_off = linkstation_poweroff; -+ -+ return 0; -+} -+ -+static void __exit linkstation_poweroff_exit(void) -+{ -+ pm_power_off = NULL; -+ unregister_reboot_notifier(&linkstation_reboot_nb); -+} -+ -+module_init(linkstation_poweroff_init); -+module_exit(linkstation_poweroff_exit); -+ -+MODULE_AUTHOR("Daniel González Cabanelas <dgcbueu@gmail.com>"); -+MODULE_DESCRIPTION("LinkStation power off driver"); -+MODULE_LICENSE("GPL v2"); diff --git a/target/linux/generic/backport-5.4/026-power-reset-linkstation-poweroff-add-missing-put_dev.patch b/target/linux/generic/backport-5.4/026-power-reset-linkstation-poweroff-add-missing-put_dev.patch deleted file mode 100644 index 66e75bf514..0000000000 --- a/target/linux/generic/backport-5.4/026-power-reset-linkstation-poweroff-add-missing-put_dev.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 1027a42c25cbf8cfc4ade6503c5110aae04866af Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20Gonz=C3=A1lez=20Cabanelas?= <dgcbueu@gmail.com> -Date: Fri, 16 Oct 2020 20:22:37 +0200 -Subject: [PATCH] power: reset: linkstation-poweroff: add missing put_device() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The of_mdio_find_bus() takes a reference to the underlying device -structure, we should release that reference using a put_device() call. - -Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com> -Signed-off-by: Sebastian Reichel <sre@kernel.org> ---- - drivers/power/reset/linkstation-poweroff.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/power/reset/linkstation-poweroff.c -+++ b/drivers/power/reset/linkstation-poweroff.c -@@ -113,6 +113,7 @@ static int __init linkstation_poweroff_i - return -EPROBE_DEFER; - - phydev = phy_find_first(bus); -+ put_device(&bus->dev); - if (!phydev) - return -EPROBE_DEFER; - diff --git a/target/linux/generic/backport-5.4/030-modpost-add-a-helper-to-get-data-pointed-by-a-symbol.patch b/target/linux/generic/backport-5.4/030-modpost-add-a-helper-to-get-data-pointed-by-a-symbol.patch deleted file mode 100644 index cf88c0c69f..0000000000 --- a/target/linux/generic/backport-5.4/030-modpost-add-a-helper-to-get-data-pointed-by-a-symbol.patch +++ /dev/null @@ -1,53 +0,0 @@ -From afa0459daa7b08c7b2c879705b69d39b734a11d0 Mon Sep 17 00:00:00 2001 -From: Masahiro Yamada <yamada.masahiro@socionext.com> -Date: Fri, 15 Nov 2019 02:42:21 +0900 -Subject: [PATCH] modpost: add a helper to get data pointed by a symbol - -When CONFIG_MODULE_REL_CRCS is enabled, the value of __crc_* is not -an absolute value, but the address to the CRC data embedded in the -.rodata section. - -Getting the data pointed by the symbol value is somewhat complex. -Split it out into a new helper, sym_get_data(). - -I will reuse it to refactor namespace_from_kstrtabns() in the next -commit. - -Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> ---- - scripts/mod/modpost.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -312,6 +312,18 @@ static const char *sec_name(struct elf_i - return sech_name(elf, &elf->sechdrs[secindex]); - } - -+static void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym) -+{ -+ Elf_Shdr *sechdr = &info->sechdrs[sym->st_shndx]; -+ unsigned long offset; -+ -+ offset = sym->st_value; -+ if (info->hdr->e_type != ET_REL) -+ offset -= sechdr->sh_addr; -+ -+ return (void *)info->hdr + sechdr->sh_offset + offset; -+} -+ - #define strstarts(str, prefix) (strncmp(str, prefix, strlen(prefix)) == 0) - - static enum export export_from_secname(struct elf_info *elf, unsigned int sec) -@@ -701,10 +713,7 @@ static void handle_modversions(struct mo - unsigned int *crcp; - - /* symbol points to the CRC in the ELF object */ -- crcp = (void *)info->hdr + sym->st_value + -- info->sechdrs[sym->st_shndx].sh_offset - -- (info->hdr->e_type != ET_REL ? -- info->sechdrs[sym->st_shndx].sh_addr : 0); -+ crcp = sym_get_data(info, sym); - crc = TO_NATIVE(*crcp); - } - sym_update_crc(symname + strlen("__crc_"), mod, crc, diff --git a/target/linux/generic/backport-5.4/031-modpost-refactor-namespace_from_kstrtabns-to-not-har.patch b/target/linux/generic/backport-5.4/031-modpost-refactor-namespace_from_kstrtabns-to-not-har.patch deleted file mode 100644 index 230dc6b89a..0000000000 --- a/target/linux/generic/backport-5.4/031-modpost-refactor-namespace_from_kstrtabns-to-not-har.patch +++ /dev/null @@ -1,62 +0,0 @@ -From e84f9fbbece1585f45a03ccc11eeabe121cadc1b Mon Sep 17 00:00:00 2001 -From: Masahiro Yamada <yamada.masahiro@socionext.com> -Date: Fri, 15 Nov 2019 02:42:22 +0900 -Subject: [PATCH] modpost: refactor namespace_from_kstrtabns() to not hard-code - section name - -Currently, namespace_from_kstrtabns() relies on the fact that -namespace strings are recorded in the __ksymtab_strings section. -Actually, it is coded in include/linux/export.h, but modpost does -not need to hard-code the section name. - -Elf_Sym::st_shndx holds the index of the relevant section. Using it is -a more portable way to get the namespace string. - -Make namespace_from_kstrtabns() simply call sym_get_data(), and delete -the info->ksymtab_strings . - -While I was here, I added more 'const' qualifiers to pointers. - -Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> ---- - scripts/mod/modpost.c | 10 +++------- - scripts/mod/modpost.h | 1 - - 2 files changed, 3 insertions(+), 8 deletions(-) - ---- a/scripts/mod/modpost.c -+++ b/scripts/mod/modpost.c -@@ -360,10 +360,10 @@ static enum export export_from_sec(struc - return export_unknown; - } - --static const char *namespace_from_kstrtabns(struct elf_info *info, -- Elf_Sym *kstrtabns) -+static const char *namespace_from_kstrtabns(const struct elf_info *info, -+ const Elf_Sym *sym) - { -- char *value = info->ksymtab_strings + kstrtabns->st_value; -+ const char *value = sym_get_data(info, sym); - return value[0] ? value : NULL; - } - -@@ -605,10 +605,6 @@ static int parse_elf(struct elf_info *in - info->export_unused_gpl_sec = i; - else if (strcmp(secname, "__ksymtab_gpl_future") == 0) - info->export_gpl_future_sec = i; -- else if (strcmp(secname, "__ksymtab_strings") == 0) -- info->ksymtab_strings = (void *)hdr + -- sechdrs[i].sh_offset - -- sechdrs[i].sh_addr; - - if (sechdrs[i].sh_type == SHT_SYMTAB) { - unsigned int sh_link_idx; ---- a/scripts/mod/modpost.h -+++ b/scripts/mod/modpost.h -@@ -143,7 +143,6 @@ struct elf_info { - Elf_Section export_gpl_sec; - Elf_Section export_unused_gpl_sec; - Elf_Section export_gpl_future_sec; -- char *ksymtab_strings; - char *strtab; - char *modinfo; - unsigned int modinfo_len; diff --git a/target/linux/generic/backport-5.4/041-v5.5-arm64-Implement-optimised-checksum-routine.patch b/target/linux/generic/backport-5.4/041-v5.5-arm64-Implement-optimised-checksum-routine.patch deleted file mode 100644 index 00ec7d0207..0000000000 --- a/target/linux/generic/backport-5.4/041-v5.5-arm64-Implement-optimised-checksum-routine.patch +++ /dev/null @@ -1,176 +0,0 @@ -From: Robin Murphy <robin.murphy@arm.com> -Date: Wed, 15 Jan 2020 16:42:39 +0000 -Subject: [PATCH] arm64: Implement optimised checksum routine - -Apparently there exist certain workloads which rely heavily on software -checksumming, for which the generic do_csum() implementation becomes a -significant bottleneck. Therefore let's give arm64 its own optimised -version - for ease of maintenance this foregoes assembly or intrisics, -and is thus not actually arm64-specific, but does rely heavily on C -idioms that translate well to the A64 ISA and the typical load/store -capabilities of most ARMv8 CPU cores. - -The resulting increase in checksum throughput scales nicely with buffer -size, tending towards 4x for a small in-order core (Cortex-A53), and up -to 6x or more for an aggressive big core (Ampere eMAG). - -Reported-by: Lingyan Huang <huanglingyan2@huawei.com> -Tested-by: Lingyan Huang <huanglingyan2@huawei.com> -Signed-off-by: Robin Murphy <robin.murphy@arm.com> -Signed-off-by: Will Deacon <will@kernel.org> ---- - create mode 100644 arch/arm64/lib/csum.c - ---- a/arch/arm64/include/asm/checksum.h -+++ b/arch/arm64/include/asm/checksum.h -@@ -36,6 +36,9 @@ static inline __sum16 ip_fast_csum(const - } - #define ip_fast_csum ip_fast_csum - -+extern unsigned int do_csum(const unsigned char *buff, int len); -+#define do_csum do_csum -+ - #include <asm-generic/checksum.h> - - #endif /* __ASM_CHECKSUM_H */ ---- a/arch/arm64/lib/Makefile -+++ b/arch/arm64/lib/Makefile -@@ -1,9 +1,9 @@ - # SPDX-License-Identifier: GPL-2.0 - lib-y := clear_user.o delay.o copy_from_user.o \ - copy_to_user.o copy_in_user.o copy_page.o \ -- clear_page.o memchr.o memcpy.o memmove.o memset.o \ -- memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ -- strchr.o strrchr.o tishift.o -+ clear_page.o csum.o memchr.o memcpy.o memmove.o \ -+ memset.o memcmp.o strcmp.o strncmp.o strlen.o \ -+ strnlen.o strchr.o strrchr.o tishift.o - - ifeq ($(CONFIG_KERNEL_MODE_NEON), y) - obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o ---- /dev/null -+++ b/arch/arm64/lib/csum.c -@@ -0,0 +1,123 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+// Copyright (C) 2019-2020 Arm Ltd. -+ -+#include <linux/compiler.h> -+#include <linux/kasan-checks.h> -+#include <linux/kernel.h> -+ -+#include <net/checksum.h> -+ -+/* Looks dumb, but generates nice-ish code */ -+static u64 accumulate(u64 sum, u64 data) -+{ -+ __uint128_t tmp = (__uint128_t)sum + data; -+ return tmp + (tmp >> 64); -+} -+ -+unsigned int do_csum(const unsigned char *buff, int len) -+{ -+ unsigned int offset, shift, sum; -+ const u64 *ptr; -+ u64 data, sum64 = 0; -+ -+ offset = (unsigned long)buff & 7; -+ /* -+ * This is to all intents and purposes safe, since rounding down cannot -+ * result in a different page or cache line being accessed, and @buff -+ * should absolutely not be pointing to anything read-sensitive. We do, -+ * however, have to be careful not to piss off KASAN, which means using -+ * unchecked reads to accommodate the head and tail, for which we'll -+ * compensate with an explicit check up-front. -+ */ -+ kasan_check_read(buff, len); -+ ptr = (u64 *)(buff - offset); -+ len = len + offset - 8; -+ -+ /* -+ * Head: zero out any excess leading bytes. Shifting back by the same -+ * amount should be at least as fast as any other way of handling the -+ * odd/even alignment, and means we can ignore it until the very end. -+ */ -+ shift = offset * 8; -+ data = READ_ONCE_NOCHECK(*ptr++); -+#ifdef __LITTLE_ENDIAN -+ data = (data >> shift) << shift; -+#else -+ data = (data << shift) >> shift; -+#endif -+ -+ /* -+ * Body: straightforward aligned loads from here on (the paired loads -+ * underlying the quadword type still only need dword alignment). The -+ * main loop strictly excludes the tail, so the second loop will always -+ * run at least once. -+ */ -+ while (unlikely(len > 64)) { -+ __uint128_t tmp1, tmp2, tmp3, tmp4; -+ -+ tmp1 = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); -+ tmp2 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 2)); -+ tmp3 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 4)); -+ tmp4 = READ_ONCE_NOCHECK(*(__uint128_t *)(ptr + 6)); -+ -+ len -= 64; -+ ptr += 8; -+ -+ /* This is the "don't dump the carry flag into a GPR" idiom */ -+ tmp1 += (tmp1 >> 64) | (tmp1 << 64); -+ tmp2 += (tmp2 >> 64) | (tmp2 << 64); -+ tmp3 += (tmp3 >> 64) | (tmp3 << 64); -+ tmp4 += (tmp4 >> 64) | (tmp4 << 64); -+ tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); -+ tmp1 += (tmp1 >> 64) | (tmp1 << 64); -+ tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); -+ tmp3 += (tmp3 >> 64) | (tmp3 << 64); -+ tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); -+ tmp1 += (tmp1 >> 64) | (tmp1 << 64); -+ tmp1 = ((tmp1 >> 64) << 64) | sum64; -+ tmp1 += (tmp1 >> 64) | (tmp1 << 64); -+ sum64 = tmp1 >> 64; -+ } -+ while (len > 8) { -+ __uint128_t tmp; -+ -+ sum64 = accumulate(sum64, data); -+ tmp = READ_ONCE_NOCHECK(*(__uint128_t *)ptr); -+ -+ len -= 16; -+ ptr += 2; -+ -+#ifdef __LITTLE_ENDIAN -+ data = tmp >> 64; -+ sum64 = accumulate(sum64, tmp); -+#else -+ data = tmp; -+ sum64 = accumulate(sum64, tmp >> 64); -+#endif -+ } -+ if (len > 0) { -+ sum64 = accumulate(sum64, data); -+ data = READ_ONCE_NOCHECK(*ptr); -+ len -= 8; -+ } -+ /* -+ * Tail: zero any over-read bytes similarly to the head, again -+ * preserving odd/even alignment. -+ */ -+ shift = len * -8; -+#ifdef __LITTLE_ENDIAN -+ data = (data << shift) >> shift; -+#else -+ data = (data >> shift) << shift; -+#endif -+ sum64 = accumulate(sum64, data); -+ -+ /* Finally, folding */ -+ sum64 += (sum64 >> 32) | (sum64 << 32); -+ sum = sum64 >> 32; -+ sum += (sum >> 16) | (sum << 16); -+ if (offset & 1) -+ return (u16)swab32(sum); -+ -+ return sum >> 16; -+} diff --git a/target/linux/generic/backport-5.4/042-v5.5-arm64-csum-Fix-pathological-zero-length-calls.patch b/target/linux/generic/backport-5.4/042-v5.5-arm64-csum-Fix-pathological-zero-length-calls.patch deleted file mode 100644 index 50b210e14f..0000000000 --- a/target/linux/generic/backport-5.4/042-v5.5-arm64-csum-Fix-pathological-zero-length-calls.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Robin Murphy <robin.murphy@arm.com> -Date: Fri, 17 Jan 2020 15:48:39 +0000 -Subject: [PATCH] arm64: csum: Fix pathological zero-length calls - -In validating the checksumming results of the new routine, I sadly -neglected to test its not-checksumming results. Thus it slipped through -that the one case where @buff is already dword-aligned and @len = 0 -manages to defeat the tail-masking logic and behave as if @len = 8. -For a zero length it doesn't make much sense to deference @buff anyway, -so just add an early return (which has essentially zero impact on -performance). - -Signed-off-by: Robin Murphy <robin.murphy@arm.com> -Signed-off-by: Will Deacon <will@kernel.org> ---- - ---- a/arch/arm64/lib/csum.c -+++ b/arch/arm64/lib/csum.c -@@ -20,6 +20,9 @@ unsigned int do_csum(const unsigned char - const u64 *ptr; - u64 data, sum64 = 0; - -+ if (unlikely(len == 0)) -+ return 0; -+ - offset = (unsigned long)buff & 7; - /* - * This is to all intents and purposes safe, since rounding down cannot diff --git a/target/linux/generic/backport-5.4/071-v5.16-00-MIPS-uasm-Enable-muhu-opcode-for-MIPS-R6.patch b/target/linux/generic/backport-5.4/071-v5.16-00-MIPS-uasm-Enable-muhu-opcode-for-MIPS-R6.patch deleted file mode 100644 index 82feb7421d..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-00-MIPS-uasm-Enable-muhu-opcode-for-MIPS-R6.patch +++ /dev/null @@ -1,65 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:02 +0200 -Subject: [PATCH] MIPS: uasm: Enable muhu opcode for MIPS R6 - -Enable the 'muhu' instruction, complementing the existing 'mulu', needed -to implement a MIPS32 BPF JIT. - -Also fix a typo in the existing definition of 'dmulu'. - -Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com> - -This patch is a dependency for my 32-bit MIPS eBPF JIT. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> ---- - ---- a/arch/mips/include/asm/uasm.h -+++ b/arch/mips/include/asm/uasm.h -@@ -145,6 +145,7 @@ Ip_u1(_mtlo); - Ip_u3u1u2(_mul); - Ip_u1u2(_multu); - Ip_u3u1u2(_mulu); -+Ip_u3u1u2(_muhu); - Ip_u3u1u2(_nor); - Ip_u3u1u2(_or); - Ip_u2u1u3(_ori); ---- a/arch/mips/mm/uasm-mips.c -+++ b/arch/mips/mm/uasm-mips.c -@@ -90,7 +90,7 @@ static const struct insn insn_table[insn - RS | RT | RD}, - [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, - [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, -- [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op), -+ [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op), - RS | RT | RD}, - [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, - [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, -@@ -150,6 +150,8 @@ static const struct insn insn_table[insn - [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, - [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), - RS | RT | RD}, -+ [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op), -+ RS | RT | RD}, - #ifndef CONFIG_CPU_MIPSR6 - [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, - #else ---- a/arch/mips/mm/uasm.c -+++ b/arch/mips/mm/uasm.c -@@ -59,7 +59,7 @@ enum opcode { - insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, - insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, - insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, -- insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor, -+ insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor, - insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, - insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, - insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, -@@ -344,6 +344,7 @@ I_u1(_mtlo) - I_u3u1u2(_mul) - I_u1u2(_multu) - I_u3u1u2(_mulu) -+I_u3u1u2(_muhu) - I_u3u1u2(_nor) - I_u3u1u2(_or) - I_u2u1u3(_ori) diff --git a/target/linux/generic/backport-5.4/071-v5.16-01-mips-uasm-Add-workaround-for-Loongson-2F-nop-CPU-err.patch b/target/linux/generic/backport-5.4/071-v5.16-01-mips-uasm-Add-workaround-for-Loongson-2F-nop-CPU-err.patch deleted file mode 100644 index 3a4d573f80..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-01-mips-uasm-Add-workaround-for-Loongson-2F-nop-CPU-err.patch +++ /dev/null @@ -1,31 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:03 +0200 -Subject: [PATCH] mips: uasm: Add workaround for Loongson-2F nop CPU errata - -This patch implements a workaround for the Loongson-2F nop in generated, -code, if the existing option CONFIG_CPU_NOP_WORKAROUND is set. Before, -the binutils option -mfix-loongson2f-nop was enabled, but no workaround -was done when emitting MIPS code. Now, the nop pseudo instruction is -emitted as "or ax,ax,zero" instead of the default "sll zero,zero,0". This -is consistent with the workaround implemented by binutils. - -Link: https://sourceware.org/legacy-ml/binutils/2009-11/msg00387.html - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com> ---- - ---- a/arch/mips/include/asm/uasm.h -+++ b/arch/mips/include/asm/uasm.h -@@ -249,7 +249,11 @@ static inline void uasm_l##lb(struct uas - #define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off) - #define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3) - #define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b) -+#ifdef CONFIG_CPU_NOP_WORKAROUNDS -+#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0) -+#else - #define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0) -+#endif - #define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1) - - static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1, diff --git a/target/linux/generic/backport-5.4/071-v5.16-02-mips-bpf-Add-eBPF-JIT-for-32-bit-MIPS.patch b/target/linux/generic/backport-5.4/071-v5.16-02-mips-bpf-Add-eBPF-JIT-for-32-bit-MIPS.patch deleted file mode 100644 index 7980659961..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-02-mips-bpf-Add-eBPF-JIT-for-32-bit-MIPS.patch +++ /dev/null @@ -1,3078 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:04 +0200 -Subject: [PATCH] mips: bpf: Add eBPF JIT for 32-bit MIPS - -This is an implementation of an eBPF JIT for 32-bit MIPS I-V and MIPS32. -The implementation supports all 32-bit and 64-bit ALU and JMP operations, -including the recently-added atomics. 64-bit div/mod and 64-bit atomics -are implemented using function calls to math64 and atomic64 functions, -respectively. All 32-bit operations are implemented natively by the JIT, -except if the CPU lacks ll/sc instructions. - -Register mapping -================ -All 64-bit eBPF registers are mapped to native 32-bit MIPS register pairs, -and does not use any stack scratch space for register swapping. This means -that all eBPF register data is kept in CPU registers all the time, and -this simplifies the register management a lot. It also reduces the JIT's -pressure on temporary registers since we do not have to move data around. - -Native register pairs are ordered according to CPU endiannes, following -the O32 calling convention for passing 64-bit arguments and return values. -The eBPF return value, arguments and callee-saved registers are mapped to -their native MIPS equivalents. - -Since the 32 highest bits in the eBPF FP (frame pointer) register are -always zero, only one general-purpose register is actually needed for the -mapping. The MIPS fp register is used for this purpose. The high bits are -mapped to MIPS register r0. This saves us one CPU register, which is much -needed for temporaries, while still allowing us to treat the R10 (FP) -register just like any other eBPF register in the JIT. - -The MIPS gp (global pointer) and at (assembler temporary) registers are -used as internal temporary registers for constant blinding. CPU registers -t6-t9 are used internally by the JIT when constructing more complex 64-bit -operations. This is precisely what is needed - two registers to store an -operand value, and two more as scratch registers when performing the -operation. - -The register mapping is shown below. - - R0 - $v1, $v0 return value - R1 - $a1, $a0 argument 1, passed in registers - R2 - $a3, $a2 argument 2, passed in registers - R3 - $t1, $t0 argument 3, passed on stack - R4 - $t3, $t2 argument 4, passed on stack - R5 - $t4, $t3 argument 5, passed on stack - R6 - $s1, $s0 callee-saved - R7 - $s3, $s2 callee-saved - R8 - $s5, $s4 callee-saved - R9 - $s7, $s6 callee-saved - FP - $r0, $fp 32-bit frame pointer - AX - $gp, $at constant-blinding - $t6 - $t9 unallocated, JIT temporaries - -Jump offsets -============ -The JIT tries to map all conditional JMP operations to MIPS conditional -PC-relative branches. The MIPS branch offset field is 18 bits, in bytes, -which is equivalent to the eBPF 16-bit instruction offset. However, since -the JIT may emit more than one CPU instruction per eBPF instruction, the -field width may overflow. If that happens, the JIT converts the long -conditional jump to a short PC-relative branch with the condition -inverted, jumping over a long unconditional absolute jmp (j). - -This conversion will change the instruction offset mapping used for jumps, -and may in turn result in more branch offset overflows. The JIT therefore -dry-runs the translation until no more branches are converted and the -offsets do not change anymore. There is an upper bound on this of course, -and if the JIT hits that limit, the last two iterations are run with all -branches being converted. - -Tail call count -=============== -The current tail call count is stored in the 16-byte area of the caller's -stack frame that is reserved for the callee in the o32 ABI. The value is -initialized in the prologue, and propagated to the tail-callee by skipping -the initialization instructions when emitting the tail call. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> ---- - create mode 100644 arch/mips/net/bpf_jit_comp.c - create mode 100644 arch/mips/net/bpf_jit_comp.h - create mode 100644 arch/mips/net/bpf_jit_comp32.c - ---- a/arch/mips/net/Makefile -+++ b/arch/mips/net/Makefile -@@ -2,4 +2,9 @@ - # MIPS networking code - - obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o --obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o -+ -+ifeq ($(CONFIG_32BIT),y) -+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o -+else -+ obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o -+endif ---- /dev/null -+++ b/arch/mips/net/bpf_jit_comp.c -@@ -0,0 +1,1020 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Just-In-Time compiler for eBPF bytecode on MIPS. -+ * Implementation of JIT functions common to 32-bit and 64-bit CPUs. -+ * -+ * Copyright (c) 2021 Anyfi Networks AB. -+ * Author: Johan Almbladh <johan.almbladh@gmail.com> -+ * -+ * Based on code and ideas from -+ * Copyright (c) 2017 Cavium, Inc. -+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> -+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> -+ */ -+ -+/* -+ * Code overview -+ * ============= -+ * -+ * - bpf_jit_comp.h -+ * Common definitions and utilities. -+ * -+ * - bpf_jit_comp.c -+ * Implementation of JIT top-level logic and exported JIT API functions. -+ * Implementation of internal operations shared by 32-bit and 64-bit code. -+ * JMP and ALU JIT control code, register control code, shared ALU and -+ * JMP/JMP32 JIT operations. -+ * -+ * - bpf_jit_comp32.c -+ * Implementation of functions to JIT prologue, epilogue and a single eBPF -+ * instruction for 32-bit MIPS CPUs. The functions use shared operations -+ * where possible, and implement the rest for 32-bit MIPS such as ALU64 -+ * operations. -+ * -+ * - bpf_jit_comp64.c -+ * Ditto, for 64-bit MIPS CPUs. -+ * -+ * Zero and sign extension -+ * ======================== -+ * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, -+ * but the eBPF instruction set mandates zero extension. We let the verifier -+ * insert explicit zero-extensions after 32-bit ALU operations, both for -+ * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs -+ * are JITed with sign extensions inserted when so expected. -+ * -+ * ALU operations -+ * ============== -+ * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are -+ * JITed in the following steps. ALU64 operations on 32-bit MIPS are more -+ * complicated and therefore only processed by special implementations in -+ * step (3). -+ * -+ * 1) valid_alu_i: -+ * Determine if an immediate operation can be emitted as such, or if -+ * we must fall back to the register version. -+ * -+ * 2) rewrite_alu_i: -+ * Convert BPF operation and immediate value to a canonical form for -+ * JITing. In some degenerate cases this form may be a no-op. -+ * -+ * 3) emit_alu_{i,i64,r,64}: -+ * Emit instructions for an ALU or ALU64 immediate or register operation. -+ * -+ * JMP operations -+ * ============== -+ * JMP and JMP32 operations require an JIT instruction offset table for -+ * translating the jump offset. This table is computed by dry-running the -+ * JIT without actually emitting anything. However, the computed PC-relative -+ * offset may overflow the 18-bit offset field width of the native MIPS -+ * branch instruction. In such cases, the long jump is converted into the -+ * following sequence. -+ * -+ * <branch> !<cond> +2 Inverted PC-relative branch -+ * nop Delay slot -+ * j <offset> Unconditional absolute long jump -+ * nop Delay slot -+ * -+ * Since this converted sequence alters the offset table, all offsets must -+ * be re-calculated. This may in turn trigger new branch conversions, so -+ * the process is repeated until no further changes are made. Normally it -+ * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we -+ * fall back to converting every remaining jump operation. The branch -+ * conversion is independent of how the JMP or JMP32 condition is JITed. -+ * -+ * JMP32 and JMP operations are JITed as follows. -+ * -+ * 1) setup_jmp_{i,r}: -+ * Convert jump conditional and offset into a form that can be JITed. -+ * This form may be a no-op, a canonical form, or an inverted PC-relative -+ * jump if branch conversion is necessary. -+ * -+ * 2) valid_jmp_i: -+ * Determine if an immediate operations can be emitted as such, or if -+ * we must fall back to the register version. Applies to JMP32 for 32-bit -+ * MIPS, and both JMP and JMP32 for 64-bit MIPS. -+ * -+ * 3) emit_jmp_{i,i64,r,r64}: -+ * Emit instructions for an JMP or JMP32 immediate or register operation. -+ * -+ * 4) finish_jmp_{i,r}: -+ * Emit any instructions needed to finish the jump. This includes a nop -+ * for the delay slot if a branch was emitted, and a long absolute jump -+ * if the branch was converted. -+ */ -+ -+#include <linux/limits.h> -+#include <linux/bitops.h> -+#include <linux/errno.h> -+#include <linux/filter.h> -+#include <linux/bpf.h> -+#include <linux/slab.h> -+#include <asm/bitops.h> -+#include <asm/cacheflush.h> -+#include <asm/cpu-features.h> -+#include <asm/isa-rev.h> -+#include <asm/uasm.h> -+ -+#include "bpf_jit_comp.h" -+ -+/* Convenience macros for descriptor access */ -+#define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) -+#define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) -+ -+/* -+ * Push registers on the stack, starting at a given depth from the stack -+ * pointer and increasing. The next depth to be written is returned. -+ */ -+int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) -+{ -+ int reg; -+ -+ for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) -+ if (mask & BIT(reg)) { -+ if ((excl & BIT(reg)) == 0) { -+ if (sizeof(long) == 4) -+ emit(ctx, sw, reg, depth, MIPS_R_SP); -+ else /* sizeof(long) == 8 */ -+ emit(ctx, sd, reg, depth, MIPS_R_SP); -+ } -+ depth += sizeof(long); -+ } -+ -+ ctx->stack_used = max((int)ctx->stack_used, depth); -+ return depth; -+} -+ -+/* -+ * Pop registers from the stack, starting at a given depth from the stack -+ * pointer and increasing. The next depth to be read is returned. -+ */ -+int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) -+{ -+ int reg; -+ -+ for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) -+ if (mask & BIT(reg)) { -+ if ((excl & BIT(reg)) == 0) { -+ if (sizeof(long) == 4) -+ emit(ctx, lw, reg, depth, MIPS_R_SP); -+ else /* sizeof(long) == 8 */ -+ emit(ctx, ld, reg, depth, MIPS_R_SP); -+ } -+ depth += sizeof(long); -+ } -+ -+ return depth; -+} -+ -+/* Compute the 28-bit jump target address from a BPF program location */ -+int get_target(struct jit_context *ctx, u32 loc) -+{ -+ u32 index = INDEX(ctx->descriptors[loc]); -+ unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; -+ unsigned long addr = (unsigned long)&ctx->target[index]; -+ -+ if (!ctx->target) -+ return 0; -+ -+ if ((addr ^ pc) & ~MIPS_JMP_MASK) -+ return -1; -+ -+ return addr & MIPS_JMP_MASK; -+} -+ -+/* Compute the PC-relative offset to relative BPF program offset */ -+int get_offset(const struct jit_context *ctx, int off) -+{ -+ return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - -+ ctx->jit_index - 1) * sizeof(u32); -+} -+ -+/* dst = imm (register width) */ -+void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) -+{ -+ if (imm >= -0x8000 && imm <= 0x7fff) { -+ emit(ctx, addiu, dst, MIPS_R_ZERO, imm); -+ } else { -+ emit(ctx, lui, dst, (s16)((u32)imm >> 16)); -+ emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* dst = src (register width) */ -+void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) -+{ -+ emit(ctx, ori, dst, src, 0); -+ clobber_reg(ctx, dst); -+} -+ -+/* Validate ALU immediate range */ -+bool valid_alu_i(u8 op, s32 imm) -+{ -+ switch (BPF_OP(op)) { -+ case BPF_NEG: -+ case BPF_LSH: -+ case BPF_RSH: -+ case BPF_ARSH: -+ /* All legal eBPF values are valid */ -+ return true; -+ case BPF_ADD: -+ /* imm must be 16 bits */ -+ return imm >= -0x8000 && imm <= 0x7fff; -+ case BPF_SUB: -+ /* -imm must be 16 bits */ -+ return imm >= -0x7fff && imm <= 0x8000; -+ case BPF_AND: -+ case BPF_OR: -+ case BPF_XOR: -+ /* imm must be 16 bits unsigned */ -+ return imm >= 0 && imm <= 0xffff; -+ case BPF_MUL: -+ /* imm must be zero or a positive power of two */ -+ return imm == 0 || (imm > 0 && is_power_of_2(imm)); -+ case BPF_DIV: -+ case BPF_MOD: -+ /* imm must be an 17-bit power of two */ -+ return (u32)imm <= 0x10000 && is_power_of_2((u32)imm); -+ } -+ return false; -+} -+ -+/* Rewrite ALU immediate operation */ -+bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) -+{ -+ bool act = true; -+ -+ switch (BPF_OP(op)) { -+ case BPF_LSH: -+ case BPF_RSH: -+ case BPF_ARSH: -+ case BPF_ADD: -+ case BPF_SUB: -+ case BPF_OR: -+ case BPF_XOR: -+ /* imm == 0 is a no-op */ -+ act = imm != 0; -+ break; -+ case BPF_MUL: -+ if (imm == 1) { -+ /* dst * 1 is a no-op */ -+ act = false; -+ } else if (imm == 0) { -+ /* dst * 0 is dst & 0 */ -+ op = BPF_AND; -+ } else { -+ /* dst * (1 << n) is dst << n */ -+ op = BPF_LSH; -+ imm = ilog2(abs(imm)); -+ } -+ break; -+ case BPF_DIV: -+ if (imm == 1) { -+ /* dst / 1 is a no-op */ -+ act = false; -+ } else { -+ /* dst / (1 << n) is dst >> n */ -+ op = BPF_RSH; -+ imm = ilog2(imm); -+ } -+ break; -+ case BPF_MOD: -+ /* dst % (1 << n) is dst & ((1 << n) - 1) */ -+ op = BPF_AND; -+ imm--; -+ break; -+ } -+ -+ *alu = op; -+ *val = imm; -+ return act; -+} -+ -+/* ALU immediate operation (32-bit) */ -+void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = -dst */ -+ case BPF_NEG: -+ emit(ctx, subu, dst, MIPS_R_ZERO, dst); -+ break; -+ /* dst = dst & imm */ -+ case BPF_AND: -+ emit(ctx, andi, dst, dst, (u16)imm); -+ break; -+ /* dst = dst | imm */ -+ case BPF_OR: -+ emit(ctx, ori, dst, dst, (u16)imm); -+ break; -+ /* dst = dst ^ imm */ -+ case BPF_XOR: -+ emit(ctx, xori, dst, dst, (u16)imm); -+ break; -+ /* dst = dst << imm */ -+ case BPF_LSH: -+ emit(ctx, sll, dst, dst, imm); -+ break; -+ /* dst = dst >> imm */ -+ case BPF_RSH: -+ emit(ctx, srl, dst, dst, imm); -+ break; -+ /* dst = dst >> imm (arithmetic) */ -+ case BPF_ARSH: -+ emit(ctx, sra, dst, dst, imm); -+ break; -+ /* dst = dst + imm */ -+ case BPF_ADD: -+ emit(ctx, addiu, dst, dst, imm); -+ break; -+ /* dst = dst - imm */ -+ case BPF_SUB: -+ emit(ctx, addiu, dst, dst, -imm); -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* ALU register operation (32-bit) */ -+void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = dst & src */ -+ case BPF_AND: -+ emit(ctx, and, dst, dst, src); -+ break; -+ /* dst = dst | src */ -+ case BPF_OR: -+ emit(ctx, or, dst, dst, src); -+ break; -+ /* dst = dst ^ src */ -+ case BPF_XOR: -+ emit(ctx, xor, dst, dst, src); -+ break; -+ /* dst = dst << src */ -+ case BPF_LSH: -+ emit(ctx, sllv, dst, dst, src); -+ break; -+ /* dst = dst >> src */ -+ case BPF_RSH: -+ emit(ctx, srlv, dst, dst, src); -+ break; -+ /* dst = dst >> src (arithmetic) */ -+ case BPF_ARSH: -+ emit(ctx, srav, dst, dst, src); -+ break; -+ /* dst = dst + src */ -+ case BPF_ADD: -+ emit(ctx, addu, dst, dst, src); -+ break; -+ /* dst = dst - src */ -+ case BPF_SUB: -+ emit(ctx, subu, dst, dst, src); -+ break; -+ /* dst = dst * src */ -+ case BPF_MUL: -+ if (cpu_has_mips32r1 || cpu_has_mips32r6) { -+ emit(ctx, mul, dst, dst, src); -+ } else { -+ emit(ctx, multu, dst, src); -+ emit(ctx, mflo, dst); -+ } -+ break; -+ /* dst = dst / src */ -+ case BPF_DIV: -+ if (cpu_has_mips32r6) { -+ emit(ctx, divu_r6, dst, dst, src); -+ } else { -+ emit(ctx, divu, dst, src); -+ emit(ctx, mflo, dst); -+ } -+ break; -+ /* dst = dst % src */ -+ case BPF_MOD: -+ if (cpu_has_mips32r6) { -+ emit(ctx, modu, dst, dst, src); -+ } else { -+ emit(ctx, divu, dst, src); -+ emit(ctx, mfhi, dst); -+ } -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Atomic read-modify-write (32-bit) */ -+void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) -+{ -+ emit(ctx, ll, MIPS_R_T9, off, dst); -+ switch (code) { -+ case BPF_ADD: -+ emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); -+ break; -+ case BPF_AND: -+ emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); -+ break; -+ case BPF_OR: -+ emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); -+ break; -+ case BPF_XOR: -+ emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); -+ break; -+ } -+ emit(ctx, sc, MIPS_R_T8, off, dst); -+ emit(ctx, beqz, MIPS_R_T8, -16); -+ emit(ctx, nop); /* Delay slot */ -+} -+ -+/* Atomic compare-and-exchange (32-bit) */ -+void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) -+{ -+ emit(ctx, ll, MIPS_R_T9, off, dst); -+ emit(ctx, bne, MIPS_R_T9, res, 12); -+ emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ -+ emit(ctx, sc, MIPS_R_T8, off, dst); -+ emit(ctx, beqz, MIPS_R_T8, -20); -+ emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ -+ clobber_reg(ctx, res); -+} -+ -+/* Swap bytes and truncate a register word or half word */ -+void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) -+{ -+ u8 tmp = MIPS_R_T8; -+ u8 msk = MIPS_R_T9; -+ -+ switch (width) { -+ /* Swap bytes in a word */ -+ case 32: -+ if (cpu_has_mips32r2 || cpu_has_mips32r6) { -+ emit(ctx, wsbh, dst, dst); -+ emit(ctx, rotr, dst, dst, 16); -+ } else { -+ emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ -+ emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ -+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ -+ -+ emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ -+ emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ -+ -+ emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ -+ emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ -+ emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ -+ emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ -+ emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ -+ } -+ break; -+ /* Swap bytes in a half word */ -+ case 16: -+ if (cpu_has_mips32r2 || cpu_has_mips32r6) { -+ emit(ctx, wsbh, dst, dst); -+ emit(ctx, andi, dst, dst, 0xffff); -+ } else { -+ emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ -+ emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ -+ emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ -+ emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ -+ emit(ctx, or, dst, dst, tmp); /* d = d | t */ -+ } -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Validate jump immediate range */ -+bool valid_jmp_i(u8 op, s32 imm) -+{ -+ switch (op) { -+ case JIT_JNOP: -+ /* Immediate value not used */ -+ return true; -+ case BPF_JEQ: -+ case BPF_JNE: -+ /* No immediate operation */ -+ return false; -+ case BPF_JSET: -+ case JIT_JNSET: -+ /* imm must be 16 bits unsigned */ -+ return imm >= 0 && imm <= 0xffff; -+ case BPF_JGE: -+ case BPF_JLT: -+ case BPF_JSGE: -+ case BPF_JSLT: -+ /* imm must be 16 bits */ -+ return imm >= -0x8000 && imm <= 0x7fff; -+ case BPF_JGT: -+ case BPF_JLE: -+ case BPF_JSGT: -+ case BPF_JSLE: -+ /* imm + 1 must be 16 bits */ -+ return imm >= -0x8001 && imm <= 0x7ffe; -+ } -+ return false; -+} -+ -+/* Invert a conditional jump operation */ -+static u8 invert_jmp(u8 op) -+{ -+ switch (op) { -+ case BPF_JA: return JIT_JNOP; -+ case BPF_JEQ: return BPF_JNE; -+ case BPF_JNE: return BPF_JEQ; -+ case BPF_JSET: return JIT_JNSET; -+ case BPF_JGT: return BPF_JLE; -+ case BPF_JGE: return BPF_JLT; -+ case BPF_JLT: return BPF_JGE; -+ case BPF_JLE: return BPF_JGT; -+ case BPF_JSGT: return BPF_JSLE; -+ case BPF_JSGE: return BPF_JSLT; -+ case BPF_JSLT: return BPF_JSGE; -+ case BPF_JSLE: return BPF_JSGT; -+ } -+ return 0; -+} -+ -+/* Prepare a PC-relative jump operation */ -+static void setup_jmp(struct jit_context *ctx, u8 bpf_op, -+ s16 bpf_off, u8 *jit_op, s32 *jit_off) -+{ -+ u32 *descp = &ctx->descriptors[ctx->bpf_index]; -+ int op = bpf_op; -+ int offset = 0; -+ -+ /* Do not compute offsets on the first pass */ -+ if (INDEX(*descp) == 0) -+ goto done; -+ -+ /* Skip jumps never taken */ -+ if (bpf_op == JIT_JNOP) -+ goto done; -+ -+ /* Convert jumps always taken */ -+ if (bpf_op == BPF_JA) -+ *descp |= JIT_DESC_CONVERT; -+ -+ /* -+ * Current ctx->jit_index points to the start of the branch preamble. -+ * Since the preamble differs among different branch conditionals, -+ * the current index cannot be used to compute the branch offset. -+ * Instead, we use the offset table value for the next instruction, -+ * which gives the index immediately after the branch delay slot. -+ */ -+ if (!CONVERTED(*descp)) { -+ int target = ctx->bpf_index + bpf_off + 1; -+ int origin = ctx->bpf_index + 1; -+ -+ offset = (INDEX(ctx->descriptors[target]) - -+ INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); -+ } -+ -+ /* -+ * The PC-relative branch offset field on MIPS is 18 bits signed, -+ * so if the computed offset is larger than this we generate a an -+ * absolute jump that we skip with an inverted conditional branch. -+ */ -+ if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { -+ offset = 3 * sizeof(u32); -+ op = invert_jmp(bpf_op); -+ ctx->changes += !CONVERTED(*descp); -+ *descp |= JIT_DESC_CONVERT; -+ } -+ -+done: -+ *jit_off = offset; -+ *jit_op = op; -+} -+ -+/* Prepare a PC-relative jump operation with immediate conditional */ -+void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, -+ u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) -+{ -+ bool always = false; -+ bool never = false; -+ -+ switch (bpf_op) { -+ case BPF_JEQ: -+ case BPF_JNE: -+ break; -+ case BPF_JSET: -+ case BPF_JLT: -+ never = imm == 0; -+ break; -+ case BPF_JGE: -+ always = imm == 0; -+ break; -+ case BPF_JGT: -+ never = (u32)imm == U32_MAX; -+ break; -+ case BPF_JLE: -+ always = (u32)imm == U32_MAX; -+ break; -+ case BPF_JSGT: -+ never = imm == S32_MAX && width == 32; -+ break; -+ case BPF_JSGE: -+ always = imm == S32_MIN && width == 32; -+ break; -+ case BPF_JSLT: -+ never = imm == S32_MIN && width == 32; -+ break; -+ case BPF_JSLE: -+ always = imm == S32_MAX && width == 32; -+ break; -+ } -+ -+ if (never) -+ bpf_op = JIT_JNOP; -+ if (always) -+ bpf_op = BPF_JA; -+ setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); -+} -+ -+/* Prepare a PC-relative jump operation with register conditional */ -+void setup_jmp_r(struct jit_context *ctx, bool same_reg, -+ u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) -+{ -+ switch (bpf_op) { -+ case BPF_JSET: -+ break; -+ case BPF_JEQ: -+ case BPF_JGE: -+ case BPF_JLE: -+ case BPF_JSGE: -+ case BPF_JSLE: -+ if (same_reg) -+ bpf_op = BPF_JA; -+ break; -+ case BPF_JNE: -+ case BPF_JLT: -+ case BPF_JGT: -+ case BPF_JSGT: -+ case BPF_JSLT: -+ if (same_reg) -+ bpf_op = JIT_JNOP; -+ break; -+ } -+ setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); -+} -+ -+/* Finish a PC-relative jump operation */ -+int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) -+{ -+ /* Emit conditional branch delay slot */ -+ if (jit_op != JIT_JNOP) -+ emit(ctx, nop); -+ /* -+ * Emit an absolute long jump with delay slot, -+ * if the PC-relative branch was converted. -+ */ -+ if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { -+ int target = get_target(ctx, ctx->bpf_index + bpf_off + 1); -+ -+ if (target < 0) -+ return -1; -+ emit(ctx, j, target); -+ emit(ctx, nop); -+ } -+ return 0; -+} -+ -+/* Jump immediate (32-bit) */ -+void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) -+{ -+ switch (op) { -+ /* No-op, used internally for branch optimization */ -+ case JIT_JNOP: -+ break; -+ /* PC += off if dst & imm */ -+ case BPF_JSET: -+ emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ -+ case JIT_JNSET: -+ emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst > imm */ -+ case BPF_JGT: -+ emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst >= imm */ -+ case BPF_JGE: -+ emit(ctx, sltiu, MIPS_R_T9, dst, imm); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst < imm */ -+ case BPF_JLT: -+ emit(ctx, sltiu, MIPS_R_T9, dst, imm); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst <= imm */ -+ case BPF_JLE: -+ emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst > imm (signed) */ -+ case BPF_JSGT: -+ emit(ctx, slti, MIPS_R_T9, dst, imm + 1); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst >= imm (signed) */ -+ case BPF_JSGE: -+ emit(ctx, slti, MIPS_R_T9, dst, imm); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst < imm (signed) */ -+ case BPF_JSLT: -+ emit(ctx, slti, MIPS_R_T9, dst, imm); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JSLE: -+ emit(ctx, slti, MIPS_R_T9, dst, imm + 1); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ } -+} -+ -+/* Jump register (32-bit) */ -+void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) -+{ -+ switch (op) { -+ /* No-op, used internally for branch optimization */ -+ case JIT_JNOP: -+ break; -+ /* PC += off if dst == src */ -+ case BPF_JEQ: -+ emit(ctx, beq, dst, src, off); -+ break; -+ /* PC += off if dst != src */ -+ case BPF_JNE: -+ emit(ctx, bne, dst, src, off); -+ break; -+ /* PC += off if dst & src */ -+ case BPF_JSET: -+ emit(ctx, and, MIPS_R_T9, dst, src); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ -+ case JIT_JNSET: -+ emit(ctx, and, MIPS_R_T9, dst, src); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst > src */ -+ case BPF_JGT: -+ emit(ctx, sltu, MIPS_R_T9, src, dst); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst >= src */ -+ case BPF_JGE: -+ emit(ctx, sltu, MIPS_R_T9, dst, src); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst < src */ -+ case BPF_JLT: -+ emit(ctx, sltu, MIPS_R_T9, dst, src); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst <= src */ -+ case BPF_JLE: -+ emit(ctx, sltu, MIPS_R_T9, src, dst); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst > src (signed) */ -+ case BPF_JSGT: -+ emit(ctx, slt, MIPS_R_T9, src, dst); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst >= src (signed) */ -+ case BPF_JSGE: -+ emit(ctx, slt, MIPS_R_T9, dst, src); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst < src (signed) */ -+ case BPF_JSLT: -+ emit(ctx, slt, MIPS_R_T9, dst, src); -+ emit(ctx, bnez, MIPS_R_T9, off); -+ break; -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JSLE: -+ emit(ctx, slt, MIPS_R_T9, src, dst); -+ emit(ctx, beqz, MIPS_R_T9, off); -+ break; -+ } -+} -+ -+/* Jump always */ -+int emit_ja(struct jit_context *ctx, s16 off) -+{ -+ int target = get_target(ctx, ctx->bpf_index + off + 1); -+ -+ if (target < 0) -+ return -1; -+ emit(ctx, j, target); -+ emit(ctx, nop); -+ return 0; -+} -+ -+/* Jump to epilogue */ -+int emit_exit(struct jit_context *ctx) -+{ -+ int target = get_target(ctx, ctx->program->len); -+ -+ if (target < 0) -+ return -1; -+ emit(ctx, j, target); -+ emit(ctx, nop); -+ return 0; -+} -+ -+/* Build the program body from eBPF bytecode */ -+static int build_body(struct jit_context *ctx) -+{ -+ const struct bpf_prog *prog = ctx->program; -+ unsigned int i; -+ -+ ctx->stack_used = 0; -+ for (i = 0; i < prog->len; i++) { -+ const struct bpf_insn *insn = &prog->insnsi[i]; -+ u32 *descp = &ctx->descriptors[i]; -+ int ret; -+ -+ access_reg(ctx, insn->src_reg); -+ access_reg(ctx, insn->dst_reg); -+ -+ ctx->bpf_index = i; -+ if (ctx->target == NULL) { -+ ctx->changes += INDEX(*descp) != ctx->jit_index; -+ *descp &= JIT_DESC_CONVERT; -+ *descp |= ctx->jit_index; -+ } -+ -+ ret = build_insn(insn, ctx); -+ if (ret < 0) -+ return ret; -+ -+ if (ret > 0) { -+ i++; -+ if (ctx->target == NULL) -+ descp[1] = ctx->jit_index; -+ } -+ } -+ -+ /* Store the end offset, where the epilogue begins */ -+ ctx->descriptors[prog->len] = ctx->jit_index; -+ return 0; -+} -+ -+/* Set the branch conversion flag on all instructions */ -+static void set_convert_flag(struct jit_context *ctx, bool enable) -+{ -+ const struct bpf_prog *prog = ctx->program; -+ u32 flag = enable ? JIT_DESC_CONVERT : 0; -+ unsigned int i; -+ -+ for (i = 0; i <= prog->len; i++) -+ ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; -+} -+ -+static void jit_fill_hole(void *area, unsigned int size) -+{ -+ u32 *p; -+ -+ /* We are guaranteed to have aligned memory. */ -+ for (p = area; size >= sizeof(u32); size -= sizeof(u32)) -+ uasm_i_break(&p, BRK_BUG); /* Increments p */ -+} -+ -+bool bpf_jit_needs_zext(void) -+{ -+ return true; -+} -+ -+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) -+{ -+ struct bpf_prog *tmp, *orig_prog = prog; -+ struct bpf_binary_header *header = NULL; -+ struct jit_context ctx; -+ bool tmp_blinded = false; -+ unsigned int tmp_idx; -+ unsigned int image_size; -+ u8 *image_ptr; -+ int tries; -+ -+ /* -+ * If BPF JIT was not enabled then we must fall back to -+ * the interpreter. -+ */ -+ if (!prog->jit_requested) -+ return orig_prog; -+ /* -+ * If constant blinding was enabled and we failed during blinding -+ * then we must fall back to the interpreter. Otherwise, we save -+ * the new JITed code. -+ */ -+ tmp = bpf_jit_blind_constants(prog); -+ if (IS_ERR(tmp)) -+ return orig_prog; -+ if (tmp != prog) { -+ tmp_blinded = true; -+ prog = tmp; -+ } -+ -+ memset(&ctx, 0, sizeof(ctx)); -+ ctx.program = prog; -+ -+ /* -+ * Not able to allocate memory for descriptors[], then -+ * we must fall back to the interpreter -+ */ -+ ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors), -+ GFP_KERNEL); -+ if (ctx.descriptors == NULL) -+ goto out_err; -+ -+ /* First pass discovers used resources */ -+ if (build_body(&ctx) < 0) -+ goto out_err; -+ /* -+ * Second pass computes instruction offsets. -+ * If any PC-relative branches are out of range, a sequence of -+ * a PC-relative branch + a jump is generated, and we have to -+ * try again from the beginning to generate the new offsets. -+ * This is done until no additional conversions are necessary. -+ * The last two iterations are done with all branches being -+ * converted, to guarantee offset table convergence within a -+ * fixed number of iterations. -+ */ -+ ctx.jit_index = 0; -+ build_prologue(&ctx); -+ tmp_idx = ctx.jit_index; -+ -+ tries = JIT_MAX_ITERATIONS; -+ do { -+ ctx.jit_index = tmp_idx; -+ ctx.changes = 0; -+ if (tries == 2) -+ set_convert_flag(&ctx, true); -+ if (build_body(&ctx) < 0) -+ goto out_err; -+ } while (ctx.changes > 0 && --tries > 0); -+ -+ if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) -+ goto out_err; -+ -+ build_epilogue(&ctx, MIPS_R_RA); -+ -+ /* Now we know the size of the structure to make */ -+ image_size = sizeof(u32) * ctx.jit_index; -+ header = bpf_jit_binary_alloc(image_size, &image_ptr, -+ sizeof(u32), jit_fill_hole); -+ /* -+ * Not able to allocate memory for the structure then -+ * we must fall back to the interpretation -+ */ -+ if (header == NULL) -+ goto out_err; -+ -+ /* Actual pass to generate final JIT code */ -+ ctx.target = (u32 *)image_ptr; -+ ctx.jit_index = 0; -+ -+ /* -+ * If building the JITed code fails somehow, -+ * we fall back to the interpretation. -+ */ -+ build_prologue(&ctx); -+ if (build_body(&ctx) < 0) -+ goto out_err; -+ build_epilogue(&ctx, MIPS_R_RA); -+ -+ /* Populate line info meta data */ -+ set_convert_flag(&ctx, false); -+ bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); -+ -+ /* Set as read-only exec and flush instruction cache */ -+ bpf_jit_binary_lock_ro(header); -+ flush_icache_range((unsigned long)header, -+ (unsigned long)&ctx.target[ctx.jit_index]); -+ -+ if (bpf_jit_enable > 1) -+ bpf_jit_dump(prog->len, image_size, 2, ctx.target); -+ -+ prog->bpf_func = (void *)ctx.target; -+ prog->jited = 1; -+ prog->jited_len = image_size; -+ -+out: -+ if (tmp_blinded) -+ bpf_jit_prog_release_other(prog, prog == orig_prog ? -+ tmp : orig_prog); -+ kfree(ctx.descriptors); -+ return prog; -+ -+out_err: -+ prog = orig_prog; -+ if (header) -+ bpf_jit_binary_free(header); -+ goto out; -+} ---- /dev/null -+++ b/arch/mips/net/bpf_jit_comp.h -@@ -0,0 +1,211 @@ -+/* SPDX-License-Identifier: GPL-2.0-only */ -+/* -+ * Just-In-Time compiler for eBPF bytecode on 32-bit and 64-bit MIPS. -+ * -+ * Copyright (c) 2021 Anyfi Networks AB. -+ * Author: Johan Almbladh <johan.almbladh@gmail.com> -+ * -+ * Based on code and ideas from -+ * Copyright (c) 2017 Cavium, Inc. -+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> -+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> -+ */ -+ -+#ifndef _BPF_JIT_COMP_H -+#define _BPF_JIT_COMP_H -+ -+/* MIPS registers */ -+#define MIPS_R_ZERO 0 /* Const zero */ -+#define MIPS_R_AT 1 /* Asm temp */ -+#define MIPS_R_V0 2 /* Result */ -+#define MIPS_R_V1 3 /* Result */ -+#define MIPS_R_A0 4 /* Argument */ -+#define MIPS_R_A1 5 /* Argument */ -+#define MIPS_R_A2 6 /* Argument */ -+#define MIPS_R_A3 7 /* Argument */ -+#define MIPS_R_A4 8 /* Arg (n64) */ -+#define MIPS_R_A5 9 /* Arg (n64) */ -+#define MIPS_R_A6 10 /* Arg (n64) */ -+#define MIPS_R_A7 11 /* Arg (n64) */ -+#define MIPS_R_T0 8 /* Temp (o32) */ -+#define MIPS_R_T1 9 /* Temp (o32) */ -+#define MIPS_R_T2 10 /* Temp (o32) */ -+#define MIPS_R_T3 11 /* Temp (o32) */ -+#define MIPS_R_T4 12 /* Temporary */ -+#define MIPS_R_T5 13 /* Temporary */ -+#define MIPS_R_T6 14 /* Temporary */ -+#define MIPS_R_T7 15 /* Temporary */ -+#define MIPS_R_S0 16 /* Saved */ -+#define MIPS_R_S1 17 /* Saved */ -+#define MIPS_R_S2 18 /* Saved */ -+#define MIPS_R_S3 19 /* Saved */ -+#define MIPS_R_S4 20 /* Saved */ -+#define MIPS_R_S5 21 /* Saved */ -+#define MIPS_R_S6 22 /* Saved */ -+#define MIPS_R_S7 23 /* Saved */ -+#define MIPS_R_T8 24 /* Temporary */ -+#define MIPS_R_T9 25 /* Temporary */ -+/* MIPS_R_K0 26 Reserved */ -+/* MIPS_R_K1 27 Reserved */ -+#define MIPS_R_GP 28 /* Global ptr */ -+#define MIPS_R_SP 29 /* Stack ptr */ -+#define MIPS_R_FP 30 /* Frame ptr */ -+#define MIPS_R_RA 31 /* Return */ -+ -+/* -+ * Jump address mask for immediate jumps. The four most significant bits -+ * must be equal to PC. -+ */ -+#define MIPS_JMP_MASK 0x0fffffffUL -+ -+/* Maximum number of iterations in offset table computation */ -+#define JIT_MAX_ITERATIONS 8 -+ -+/* -+ * Jump pseudo-instructions used internally -+ * for branch conversion and branch optimization. -+ */ -+#define JIT_JNSET 0xe0 -+#define JIT_JNOP 0xf0 -+ -+/* Descriptor flag for PC-relative branch conversion */ -+#define JIT_DESC_CONVERT BIT(31) -+ -+/* JIT context for an eBPF program */ -+struct jit_context { -+ struct bpf_prog *program; /* The eBPF program being JITed */ -+ u32 *descriptors; /* eBPF to JITed CPU insn descriptors */ -+ u32 *target; /* JITed code buffer */ -+ u32 bpf_index; /* Index of current BPF program insn */ -+ u32 jit_index; /* Index of current JIT target insn */ -+ u32 changes; /* Number of PC-relative branch conv */ -+ u32 accessed; /* Bit mask of read eBPF registers */ -+ u32 clobbered; /* Bit mask of modified CPU registers */ -+ u32 stack_size; /* Total allocated stack size in bytes */ -+ u32 saved_size; /* Size of callee-saved registers */ -+ u32 stack_used; /* Stack size used for function calls */ -+}; -+ -+/* Emit the instruction if the JIT memory space has been allocated */ -+#define emit(ctx, func, ...) \ -+do { \ -+ if ((ctx)->target != NULL) { \ -+ u32 *p = &(ctx)->target[ctx->jit_index]; \ -+ uasm_i_##func(&p, ##__VA_ARGS__); \ -+ } \ -+ (ctx)->jit_index++; \ -+} while (0) -+ -+/* -+ * Mark a BPF register as accessed, it needs to be -+ * initialized by the program if expected, e.g. FP. -+ */ -+static inline void access_reg(struct jit_context *ctx, u8 reg) -+{ -+ ctx->accessed |= BIT(reg); -+} -+ -+/* -+ * Mark a CPU register as clobbered, it needs to be -+ * saved/restored by the program if callee-saved. -+ */ -+static inline void clobber_reg(struct jit_context *ctx, u8 reg) -+{ -+ ctx->clobbered |= BIT(reg); -+} -+ -+/* -+ * Push registers on the stack, starting at a given depth from the stack -+ * pointer and increasing. The next depth to be written is returned. -+ */ -+int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); -+ -+/* -+ * Pop registers from the stack, starting at a given depth from the stack -+ * pointer and increasing. The next depth to be read is returned. -+ */ -+int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); -+ -+/* Compute the 28-bit jump target address from a BPF program location */ -+int get_target(struct jit_context *ctx, u32 loc); -+ -+/* Compute the PC-relative offset to relative BPF program offset */ -+int get_offset(const struct jit_context *ctx, int off); -+ -+/* dst = imm (32-bit) */ -+void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm); -+ -+/* dst = src (32-bit) */ -+void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src); -+ -+/* Validate ALU/ALU64 immediate range */ -+bool valid_alu_i(u8 op, s32 imm); -+ -+/* Rewrite ALU/ALU64 immediate operation */ -+bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val); -+ -+/* ALU immediate operation (32-bit) */ -+void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op); -+ -+/* ALU register operation (32-bit) */ -+void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op); -+ -+/* Atomic read-modify-write (32-bit) */ -+void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code); -+ -+/* Atomic compare-and-exchange (32-bit) */ -+void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off); -+ -+/* Swap bytes and truncate a register word or half word */ -+void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width); -+ -+/* Validate JMP/JMP32 immediate range */ -+bool valid_jmp_i(u8 op, s32 imm); -+ -+/* Prepare a PC-relative jump operation with immediate conditional */ -+void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, -+ u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); -+ -+/* Prepare a PC-relative jump operation with register conditional */ -+void setup_jmp_r(struct jit_context *ctx, bool same_reg, -+ u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); -+ -+/* Finish a PC-relative jump operation */ -+int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off); -+ -+/* Conditional JMP/JMP32 immediate */ -+void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op); -+ -+/* Conditional JMP/JMP32 register */ -+void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op); -+ -+/* Jump always */ -+int emit_ja(struct jit_context *ctx, s16 off); -+ -+/* Jump to epilogue */ -+int emit_exit(struct jit_context *ctx); -+ -+/* -+ * Build program prologue to set up the stack and registers. -+ * This function is implemented separately for 32-bit and 64-bit JITs. -+ */ -+void build_prologue(struct jit_context *ctx); -+ -+/* -+ * Build the program epilogue to restore the stack and registers. -+ * This function is implemented separately for 32-bit and 64-bit JITs. -+ */ -+void build_epilogue(struct jit_context *ctx, int dest_reg); -+ -+/* -+ * Convert an eBPF instruction to native instruction, i.e -+ * JITs an eBPF instruction. -+ * Returns : -+ * 0 - Successfully JITed an 8-byte eBPF instruction -+ * >0 - Successfully JITed a 16-byte eBPF instruction -+ * <0 - Failed to JIT. -+ * This function is implemented separately for 32-bit and 64-bit JITs. -+ */ -+int build_insn(const struct bpf_insn *insn, struct jit_context *ctx); -+ -+#endif /* _BPF_JIT_COMP_H */ ---- /dev/null -+++ b/arch/mips/net/bpf_jit_comp32.c -@@ -0,0 +1,1741 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Just-In-Time compiler for eBPF bytecode on MIPS. -+ * Implementation of JIT functions for 32-bit CPUs. -+ * -+ * Copyright (c) 2021 Anyfi Networks AB. -+ * Author: Johan Almbladh <johan.almbladh@gmail.com> -+ * -+ * Based on code and ideas from -+ * Copyright (c) 2017 Cavium, Inc. -+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> -+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> -+ */ -+ -+#include <linux/math64.h> -+#include <linux/errno.h> -+#include <linux/filter.h> -+#include <linux/bpf.h> -+#include <asm/cpu-features.h> -+#include <asm/isa-rev.h> -+#include <asm/uasm.h> -+ -+#include "bpf_jit_comp.h" -+ -+/* MIPS a4-a7 are not available in the o32 ABI */ -+#undef MIPS_R_A4 -+#undef MIPS_R_A5 -+#undef MIPS_R_A6 -+#undef MIPS_R_A7 -+ -+/* Stack is 8-byte aligned in o32 ABI */ -+#define MIPS_STACK_ALIGNMENT 8 -+ -+/* -+ * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI. -+ * This corresponds to stack space for register arguments a0-a3. -+ */ -+#define JIT_RESERVED_STACK 16 -+ -+/* Temporary 64-bit register used by JIT */ -+#define JIT_REG_TMP MAX_BPF_JIT_REG -+ -+/* -+ * Number of prologue bytes to skip when doing a tail call. -+ * Tail call count (TCC) initialization (8 bytes) always, plus -+ * R0-to-v0 assignment (4 bytes) if big endian. -+ */ -+#ifdef __BIG_ENDIAN -+#define JIT_TCALL_SKIP 12 -+#else -+#define JIT_TCALL_SKIP 8 -+#endif -+ -+/* CPU registers holding the callee return value */ -+#define JIT_RETURN_REGS \ -+ (BIT(MIPS_R_V0) | \ -+ BIT(MIPS_R_V1)) -+ -+/* CPU registers arguments passed to callee directly */ -+#define JIT_ARG_REGS \ -+ (BIT(MIPS_R_A0) | \ -+ BIT(MIPS_R_A1) | \ -+ BIT(MIPS_R_A2) | \ -+ BIT(MIPS_R_A3)) -+ -+/* CPU register arguments passed to callee on stack */ -+#define JIT_STACK_REGS \ -+ (BIT(MIPS_R_T0) | \ -+ BIT(MIPS_R_T1) | \ -+ BIT(MIPS_R_T2) | \ -+ BIT(MIPS_R_T3) | \ -+ BIT(MIPS_R_T4) | \ -+ BIT(MIPS_R_T5)) -+ -+/* Caller-saved CPU registers */ -+#define JIT_CALLER_REGS \ -+ (JIT_RETURN_REGS | \ -+ JIT_ARG_REGS | \ -+ JIT_STACK_REGS) -+ -+/* Callee-saved CPU registers */ -+#define JIT_CALLEE_REGS \ -+ (BIT(MIPS_R_S0) | \ -+ BIT(MIPS_R_S1) | \ -+ BIT(MIPS_R_S2) | \ -+ BIT(MIPS_R_S3) | \ -+ BIT(MIPS_R_S4) | \ -+ BIT(MIPS_R_S5) | \ -+ BIT(MIPS_R_S6) | \ -+ BIT(MIPS_R_S7) | \ -+ BIT(MIPS_R_GP) | \ -+ BIT(MIPS_R_FP) | \ -+ BIT(MIPS_R_RA)) -+ -+/* -+ * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers. -+ * -+ * 1) Native register pairs are ordered according to CPU endiannes, following -+ * the MIPS convention for passing 64-bit arguments and return values. -+ * 2) The eBPF return value, arguments and callee-saved registers are mapped -+ * to their native MIPS equivalents. -+ * 3) Since the 32 highest bits in the eBPF FP register are always zero, -+ * only one general-purpose register is actually needed for the mapping. -+ * We use the fp register for this purpose, and map the highest bits to -+ * the MIPS register r0 (zero). -+ * 4) We use the MIPS gp and at registers as internal temporary registers -+ * for constant blinding. The gp register is callee-saved. -+ * 5) One 64-bit temporary register is mapped for use when sign-extending -+ * immediate operands. MIPS registers t6-t9 are available to the JIT -+ * for as temporaries when implementing complex 64-bit operations. -+ * -+ * With this scheme all eBPF registers are being mapped to native MIPS -+ * registers without having to use any stack scratch space. The direct -+ * register mapping (2) simplifies the handling of function calls. -+ */ -+static const u8 bpf2mips32[][2] = { -+ /* Return value from in-kernel function, and exit value from eBPF */ -+ [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0}, -+ /* Arguments from eBPF program to in-kernel function */ -+ [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0}, -+ [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2}, -+ /* Remaining arguments, to be passed on the stack per O32 ABI */ -+ [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0}, -+ [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2}, -+ [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4}, -+ /* Callee-saved registers that in-kernel function will preserve */ -+ [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0}, -+ [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2}, -+ [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4}, -+ [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6}, -+ /* Read-only frame pointer to access the eBPF stack */ -+#ifdef __BIG_ENDIAN -+ [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO}, -+#else -+ [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP}, -+#endif -+ /* Temporary register for blinding constants */ -+ [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT}, -+ /* Temporary register for internal JIT use */ -+ [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6}, -+}; -+ -+/* Get low CPU register for a 64-bit eBPF register mapping */ -+static inline u8 lo(const u8 reg[]) -+{ -+#ifdef __BIG_ENDIAN -+ return reg[0]; -+#else -+ return reg[1]; -+#endif -+} -+ -+/* Get high CPU register for a 64-bit eBPF register mapping */ -+static inline u8 hi(const u8 reg[]) -+{ -+#ifdef __BIG_ENDIAN -+ return reg[1]; -+#else -+ return reg[0]; -+#endif -+} -+ -+/* -+ * Mark a 64-bit CPU register pair as clobbered, it needs to be -+ * saved/restored by the program if callee-saved. -+ */ -+static void clobber_reg64(struct jit_context *ctx, const u8 reg[]) -+{ -+ clobber_reg(ctx, reg[0]); -+ clobber_reg(ctx, reg[1]); -+} -+ -+/* dst = imm (sign-extended) */ -+static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm) -+{ -+ emit_mov_i(ctx, lo(dst), imm); -+ if (imm < 0) -+ emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); -+ else -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ clobber_reg64(ctx, dst); -+} -+ -+/* Zero extension, if verifier does not do it for us */ -+static void emit_zext_ver(struct jit_context *ctx, const u8 dst[]) -+{ -+ if (!ctx->program->aux->verifier_zext) { -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ clobber_reg(ctx, hi(dst)); -+ } -+} -+ -+/* Load delay slot, if ISA mandates it */ -+static void emit_load_delay(struct jit_context *ctx) -+{ -+ if (!cpu_has_mips_2_3_4_5_r) -+ emit(ctx, nop); -+} -+ -+/* ALU immediate operation (64-bit) */ -+static void emit_alu_i64(struct jit_context *ctx, -+ const u8 dst[], s32 imm, u8 op) -+{ -+ u8 src = MIPS_R_T6; -+ -+ /* -+ * ADD/SUB with all but the max negative imm can be handled by -+ * inverting the operation and the imm value, saving one insn. -+ */ -+ if (imm > S32_MIN && imm < 0) -+ switch (op) { -+ case BPF_ADD: -+ op = BPF_SUB; -+ imm = -imm; -+ break; -+ case BPF_SUB: -+ op = BPF_ADD; -+ imm = -imm; -+ break; -+ } -+ -+ /* Move immediate to temporary register */ -+ emit_mov_i(ctx, src, imm); -+ -+ switch (op) { -+ /* dst = dst + imm */ -+ case BPF_ADD: -+ emit(ctx, addu, lo(dst), lo(dst), src); -+ emit(ctx, sltu, MIPS_R_T9, lo(dst), src); -+ emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); -+ if (imm < 0) -+ emit(ctx, addiu, hi(dst), hi(dst), -1); -+ break; -+ /* dst = dst - imm */ -+ case BPF_SUB: -+ emit(ctx, sltu, MIPS_R_T9, lo(dst), src); -+ emit(ctx, subu, lo(dst), lo(dst), src); -+ emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); -+ if (imm < 0) -+ emit(ctx, addiu, hi(dst), hi(dst), 1); -+ break; -+ /* dst = dst | imm */ -+ case BPF_OR: -+ emit(ctx, or, lo(dst), lo(dst), src); -+ if (imm < 0) -+ emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); -+ break; -+ /* dst = dst & imm */ -+ case BPF_AND: -+ emit(ctx, and, lo(dst), lo(dst), src); -+ if (imm >= 0) -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ break; -+ /* dst = dst ^ imm */ -+ case BPF_XOR: -+ emit(ctx, xor, lo(dst), lo(dst), src); -+ if (imm < 0) { -+ emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); -+ emit(ctx, addiu, hi(dst), hi(dst), -1); -+ } -+ break; -+ } -+ clobber_reg64(ctx, dst); -+} -+ -+/* ALU register operation (64-bit) */ -+static void emit_alu_r64(struct jit_context *ctx, -+ const u8 dst[], const u8 src[], u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = dst + src */ -+ case BPF_ADD: -+ if (src == dst) { -+ emit(ctx, srl, MIPS_R_T9, lo(dst), 31); -+ emit(ctx, addu, lo(dst), lo(dst), lo(dst)); -+ } else { -+ emit(ctx, addu, lo(dst), lo(dst), lo(src)); -+ emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); -+ } -+ emit(ctx, addu, hi(dst), hi(dst), hi(src)); -+ emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); -+ break; -+ /* dst = dst - src */ -+ case BPF_SUB: -+ emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); -+ emit(ctx, subu, lo(dst), lo(dst), lo(src)); -+ emit(ctx, subu, hi(dst), hi(dst), hi(src)); -+ emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); -+ break; -+ /* dst = dst | src */ -+ case BPF_OR: -+ emit(ctx, or, lo(dst), lo(dst), lo(src)); -+ emit(ctx, or, hi(dst), hi(dst), hi(src)); -+ break; -+ /* dst = dst & src */ -+ case BPF_AND: -+ emit(ctx, and, lo(dst), lo(dst), lo(src)); -+ emit(ctx, and, hi(dst), hi(dst), hi(src)); -+ break; -+ /* dst = dst ^ src */ -+ case BPF_XOR: -+ emit(ctx, xor, lo(dst), lo(dst), lo(src)); -+ emit(ctx, xor, hi(dst), hi(dst), hi(src)); -+ break; -+ } -+ clobber_reg64(ctx, dst); -+} -+ -+/* ALU invert (64-bit) */ -+static void emit_neg_i64(struct jit_context *ctx, const u8 dst[]) -+{ -+ emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst)); -+ emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst)); -+ emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); -+ emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); -+ -+ clobber_reg64(ctx, dst); -+} -+ -+/* ALU shift immediate (64-bit) */ -+static void emit_shift_i64(struct jit_context *ctx, -+ const u8 dst[], u32 imm, u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = dst << imm */ -+ case BPF_LSH: -+ if (imm < 32) { -+ emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm); -+ emit(ctx, sll, lo(dst), lo(dst), imm); -+ emit(ctx, sll, hi(dst), hi(dst), imm); -+ emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9); -+ } else { -+ emit(ctx, sll, hi(dst), lo(dst), imm - 32); -+ emit(ctx, move, lo(dst), MIPS_R_ZERO); -+ } -+ break; -+ /* dst = dst >> imm */ -+ case BPF_RSH: -+ if (imm < 32) { -+ emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); -+ emit(ctx, srl, lo(dst), lo(dst), imm); -+ emit(ctx, srl, hi(dst), hi(dst), imm); -+ emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); -+ } else { -+ emit(ctx, srl, lo(dst), hi(dst), imm - 32); -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ } -+ break; -+ /* dst = dst >> imm (arithmetic) */ -+ case BPF_ARSH: -+ if (imm < 32) { -+ emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); -+ emit(ctx, srl, lo(dst), lo(dst), imm); -+ emit(ctx, sra, hi(dst), hi(dst), imm); -+ emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); -+ } else { -+ emit(ctx, sra, lo(dst), hi(dst), imm - 32); -+ emit(ctx, sra, hi(dst), hi(dst), 31); -+ } -+ break; -+ } -+ clobber_reg64(ctx, dst); -+} -+ -+/* ALU shift register (64-bit) */ -+static void emit_shift_r64(struct jit_context *ctx, -+ const u8 dst[], u8 src, u8 op) -+{ -+ u8 t1 = MIPS_R_T8; -+ u8 t2 = MIPS_R_T9; -+ -+ emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */ -+ emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */ -+ emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */ -+ -+ switch (BPF_OP(op)) { -+ /* dst = dst << src */ -+ case BPF_LSH: -+ /* Next: shift >= 32 */ -+ emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */ -+ emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */ -+ emit(ctx, b, 20); /* PC += 20 */ -+ /* +16: shift < 32 */ -+ emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */ -+ emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */ -+ emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */ -+ emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */ -+ emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */ -+ break; -+ /* dst = dst >> src */ -+ case BPF_RSH: -+ /* Next: shift >= 32 */ -+ emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */ -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */ -+ emit(ctx, b, 20); /* PC += 20 */ -+ /* +16: shift < 32 */ -+ emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ -+ emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ -+ emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */ -+ emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */ -+ emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ -+ break; -+ /* dst = dst >> src (arithmetic) */ -+ case BPF_ARSH: -+ /* Next: shift >= 32 */ -+ emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */ -+ emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */ -+ emit(ctx, b, 20); /* PC += 20 */ -+ /* +16: shift < 32 */ -+ emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ -+ emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ -+ emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */ -+ emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */ -+ emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ -+ break; -+ } -+ -+ /* +20: Done */ -+ clobber_reg64(ctx, dst); -+} -+ -+/* ALU mul immediate (64x32-bit) */ -+static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm) -+{ -+ u8 src = MIPS_R_T6; -+ u8 tmp = MIPS_R_T9; -+ -+ switch (imm) { -+ /* dst = dst * 1 is a no-op */ -+ case 1: -+ break; -+ /* dst = dst * -1 */ -+ case -1: -+ emit_neg_i64(ctx, dst); -+ break; -+ case 0: -+ emit_mov_r(ctx, lo(dst), MIPS_R_ZERO); -+ emit_mov_r(ctx, hi(dst), MIPS_R_ZERO); -+ break; -+ /* Full 64x32 multiply */ -+ default: -+ /* hi(dst) = hi(dst) * src(imm) */ -+ emit_mov_i(ctx, src, imm); -+ if (cpu_has_mips32r1 || cpu_has_mips32r6) { -+ emit(ctx, mul, hi(dst), hi(dst), src); -+ } else { -+ emit(ctx, multu, hi(dst), src); -+ emit(ctx, mflo, hi(dst)); -+ } -+ -+ /* hi(dst) = hi(dst) - lo(dst) */ -+ if (imm < 0) -+ emit(ctx, subu, hi(dst), hi(dst), lo(dst)); -+ -+ /* tmp = lo(dst) * src(imm) >> 32 */ -+ /* lo(dst) = lo(dst) * src(imm) */ -+ if (cpu_has_mips32r6) { -+ emit(ctx, muhu, tmp, lo(dst), src); -+ emit(ctx, mulu, lo(dst), lo(dst), src); -+ } else { -+ emit(ctx, multu, lo(dst), src); -+ emit(ctx, mflo, lo(dst)); -+ emit(ctx, mfhi, tmp); -+ } -+ -+ /* hi(dst) += tmp */ -+ emit(ctx, addu, hi(dst), hi(dst), tmp); -+ clobber_reg64(ctx, dst); -+ break; -+ } -+} -+ -+/* ALU mul register (64x64-bit) */ -+static void emit_mul_r64(struct jit_context *ctx, -+ const u8 dst[], const u8 src[]) -+{ -+ u8 acc = MIPS_R_T8; -+ u8 tmp = MIPS_R_T9; -+ -+ /* acc = hi(dst) * lo(src) */ -+ if (cpu_has_mips32r1 || cpu_has_mips32r6) { -+ emit(ctx, mul, acc, hi(dst), lo(src)); -+ } else { -+ emit(ctx, multu, hi(dst), lo(src)); -+ emit(ctx, mflo, acc); -+ } -+ -+ /* tmp = lo(dst) * hi(src) */ -+ if (cpu_has_mips32r1 || cpu_has_mips32r6) { -+ emit(ctx, mul, tmp, lo(dst), hi(src)); -+ } else { -+ emit(ctx, multu, lo(dst), hi(src)); -+ emit(ctx, mflo, tmp); -+ } -+ -+ /* acc += tmp */ -+ emit(ctx, addu, acc, acc, tmp); -+ -+ /* tmp = lo(dst) * lo(src) >> 32 */ -+ /* lo(dst) = lo(dst) * lo(src) */ -+ if (cpu_has_mips32r6) { -+ emit(ctx, muhu, tmp, lo(dst), lo(src)); -+ emit(ctx, mulu, lo(dst), lo(dst), lo(src)); -+ } else { -+ emit(ctx, multu, lo(dst), lo(src)); -+ emit(ctx, mflo, lo(dst)); -+ emit(ctx, mfhi, tmp); -+ } -+ -+ /* hi(dst) = acc + tmp */ -+ emit(ctx, addu, hi(dst), acc, tmp); -+ clobber_reg64(ctx, dst); -+} -+ -+/* Helper function for 64-bit modulo */ -+static u64 jit_mod64(u64 a, u64 b) -+{ -+ u64 rem; -+ -+ div64_u64_rem(a, b, &rem); -+ return rem; -+} -+ -+/* ALU div/mod register (64-bit) */ -+static void emit_divmod_r64(struct jit_context *ctx, -+ const u8 dst[], const u8 src[], u8 op) -+{ -+ const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ -+ const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ -+ const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */ -+ int exclude, k; -+ u32 addr = 0; -+ -+ /* Push caller-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ 0, JIT_RESERVED_STACK); -+ -+ /* Put 64-bit arguments 1 and 2 in registers a0-a3 */ -+ for (k = 0; k < 2; k++) { -+ emit(ctx, move, MIPS_R_T9, src[k]); -+ emit(ctx, move, r1[k], dst[k]); -+ emit(ctx, move, r2[k], MIPS_R_T9); -+ } -+ -+ /* Emit function call */ -+ switch (BPF_OP(op)) { -+ /* dst = dst / src */ -+ case BPF_DIV: -+ addr = (u32)&div64_u64; -+ break; -+ /* dst = dst % src */ -+ case BPF_MOD: -+ addr = (u32)&jit_mod64; -+ break; -+ } -+ emit_mov_i(ctx, MIPS_R_T9, addr); -+ emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* Store the 64-bit result in dst */ -+ emit(ctx, move, dst[0], r0[0]); -+ emit(ctx, move, dst[1], r0[1]); -+ -+ /* Restore caller-saved registers, excluding the computed result */ -+ exclude = BIT(lo(dst)) | BIT(hi(dst)); -+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ exclude, JIT_RESERVED_STACK); -+ emit_load_delay(ctx); -+ -+ clobber_reg64(ctx, dst); -+ clobber_reg(ctx, MIPS_R_V0); -+ clobber_reg(ctx, MIPS_R_V1); -+ clobber_reg(ctx, MIPS_R_RA); -+} -+ -+/* Swap bytes in a register word */ -+static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask) -+{ -+ u8 tmp = MIPS_R_T9; -+ -+ emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */ -+ emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ -+ emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */ -+ emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */ -+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ -+} -+ -+/* Swap half words in a register word */ -+static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src) -+{ -+ u8 tmp = MIPS_R_T9; -+ -+ emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */ -+ emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */ -+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ -+} -+ -+/* Swap bytes and truncate a register double word, word or half word */ -+static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width) -+{ -+ u8 tmp = MIPS_R_T8; -+ -+ switch (width) { -+ /* Swap bytes in a double word */ -+ case 64: -+ if (cpu_has_mips32r2 || cpu_has_mips32r6) { -+ emit(ctx, rotr, tmp, hi(dst), 16); -+ emit(ctx, rotr, hi(dst), lo(dst), 16); -+ emit(ctx, wsbh, lo(dst), tmp); -+ emit(ctx, wsbh, hi(dst), hi(dst)); -+ } else { -+ emit_swap16_r(ctx, tmp, lo(dst)); -+ emit_swap16_r(ctx, lo(dst), hi(dst)); -+ emit(ctx, move, hi(dst), tmp); -+ -+ emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */ -+ emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */ -+ emit_swap8_r(ctx, lo(dst), lo(dst), tmp); -+ emit_swap8_r(ctx, hi(dst), hi(dst), tmp); -+ } -+ break; -+ /* Swap bytes in a word */ -+ /* Swap bytes in a half word */ -+ case 32: -+ case 16: -+ emit_bswap_r(ctx, lo(dst), width); -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ break; -+ } -+ clobber_reg64(ctx, dst); -+} -+ -+/* Truncate a register double word, word or half word */ -+static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width) -+{ -+ switch (width) { -+ case 64: -+ break; -+ /* Zero-extend a word */ -+ case 32: -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ clobber_reg(ctx, hi(dst)); -+ break; -+ /* Zero-extend a half word */ -+ case 16: -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ emit(ctx, andi, lo(dst), lo(dst), 0xffff); -+ clobber_reg64(ctx, dst); -+ break; -+ } -+} -+ -+/* Load operation: dst = *(size*)(src + off) */ -+static void emit_ldx(struct jit_context *ctx, -+ const u8 dst[], u8 src, s16 off, u8 size) -+{ -+ switch (size) { -+ /* Load a byte */ -+ case BPF_B: -+ emit(ctx, lbu, lo(dst), off, src); -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ break; -+ /* Load a half word */ -+ case BPF_H: -+ emit(ctx, lhu, lo(dst), off, src); -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ break; -+ /* Load a word */ -+ case BPF_W: -+ emit(ctx, lw, lo(dst), off, src); -+ emit(ctx, move, hi(dst), MIPS_R_ZERO); -+ break; -+ /* Load a double word */ -+ case BPF_DW: -+ if (dst[1] == src) { -+ emit(ctx, lw, dst[0], off + 4, src); -+ emit(ctx, lw, dst[1], off, src); -+ } else { -+ emit(ctx, lw, dst[1], off, src); -+ emit(ctx, lw, dst[0], off + 4, src); -+ } -+ emit_load_delay(ctx); -+ break; -+ } -+ clobber_reg64(ctx, dst); -+} -+ -+/* Store operation: *(size *)(dst + off) = src */ -+static void emit_stx(struct jit_context *ctx, -+ const u8 dst, const u8 src[], s16 off, u8 size) -+{ -+ switch (size) { -+ /* Store a byte */ -+ case BPF_B: -+ emit(ctx, sb, lo(src), off, dst); -+ break; -+ /* Store a half word */ -+ case BPF_H: -+ emit(ctx, sh, lo(src), off, dst); -+ break; -+ /* Store a word */ -+ case BPF_W: -+ emit(ctx, sw, lo(src), off, dst); -+ break; -+ /* Store a double word */ -+ case BPF_DW: -+ emit(ctx, sw, src[1], off, dst); -+ emit(ctx, sw, src[0], off + 4, dst); -+ break; -+ } -+} -+ -+/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */ -+static void emit_atomic_r32(struct jit_context *ctx, -+ u8 dst, u8 src, s16 off, u8 code) -+{ -+ u32 exclude = 0; -+ u32 addr = 0; -+ -+ /* Push caller-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ 0, JIT_RESERVED_STACK); -+ /* -+ * Argument 1: dst+off if xchg, otherwise src, passed in register a0 -+ * Argument 2: src if xchg, othersize dst+off, passed in register a1 -+ */ -+ emit(ctx, move, MIPS_R_T9, dst); -+ emit(ctx, move, MIPS_R_A0, src); -+ emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off); -+ -+ /* Emit function call */ -+ switch (code) { -+ case BPF_ADD: -+ addr = (u32)&atomic_add; -+ break; -+ case BPF_SUB: -+ addr = (u32)&atomic_sub; -+ break; -+ case BPF_OR: -+ addr = (u32)&atomic_or; -+ break; -+ case BPF_AND: -+ addr = (u32)&atomic_and; -+ break; -+ case BPF_XOR: -+ addr = (u32)&atomic_xor; -+ break; -+ } -+ emit_mov_i(ctx, MIPS_R_T9, addr); -+ emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* Restore caller-saved registers, except any fetched value */ -+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ exclude, JIT_RESERVED_STACK); -+ emit_load_delay(ctx); -+ clobber_reg(ctx, MIPS_R_RA); -+} -+ -+/* Atomic read-modify-write (64-bit) */ -+static void emit_atomic_r64(struct jit_context *ctx, -+ u8 dst, const u8 src[], s16 off, u8 code) -+{ -+ const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ -+ u32 exclude = 0; -+ u32 addr = 0; -+ -+ /* Push caller-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ 0, JIT_RESERVED_STACK); -+ /* -+ * Argument 1: 64-bit src, passed in registers a0-a1 -+ * Argument 2: 32-bit dst+off, passed in register a2 -+ */ -+ emit(ctx, move, MIPS_R_T9, dst); -+ emit(ctx, move, r1[0], src[0]); -+ emit(ctx, move, r1[1], src[1]); -+ emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off); -+ -+ /* Emit function call */ -+ switch (code) { -+ case BPF_ADD: -+ addr = (u32)&atomic64_add; -+ break; -+ case BPF_SUB: -+ addr = (u32)&atomic64_sub; -+ break; -+ case BPF_OR: -+ addr = (u32)&atomic64_or; -+ break; -+ case BPF_AND: -+ addr = (u32)&atomic64_and; -+ break; -+ case BPF_XOR: -+ addr = (u32)&atomic64_xor; -+ break; -+ } -+ emit_mov_i(ctx, MIPS_R_T9, addr); -+ emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* Restore caller-saved registers, except any fetched value */ -+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, -+ exclude, JIT_RESERVED_STACK); -+ emit_load_delay(ctx); -+ clobber_reg(ctx, MIPS_R_RA); -+} -+ -+/* -+ * Conditional movz or an emulated equivalent. -+ * Note that the rs register may be modified. -+ */ -+static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) -+{ -+ if (cpu_has_mips_2) { -+ emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */ -+ } else if (cpu_has_mips32r6) { -+ if (rs != MIPS_R_ZERO) -+ emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */ -+ emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */ -+ if (rs != MIPS_R_ZERO) -+ emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ -+ } else { -+ emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */ -+ emit(ctx, nop); /* +0: delay slot */ -+ emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ -+ } -+ clobber_reg(ctx, rd); -+ clobber_reg(ctx, rs); -+} -+ -+/* -+ * Conditional movn or an emulated equivalent. -+ * Note that the rs register may be modified. -+ */ -+static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) -+{ -+ if (cpu_has_mips_2) { -+ emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */ -+ } else if (cpu_has_mips32r6) { -+ if (rs != MIPS_R_ZERO) -+ emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */ -+ emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */ -+ if (rs != MIPS_R_ZERO) -+ emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ -+ } else { -+ emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */ -+ emit(ctx, nop); /* +0: delay slot */ -+ emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ -+ } -+ clobber_reg(ctx, rd); -+ clobber_reg(ctx, rs); -+} -+ -+/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */ -+static void emit_sltiu_r64(struct jit_context *ctx, u8 rd, -+ const u8 rs[], s64 imm) -+{ -+ u8 tmp = MIPS_R_T9; -+ -+ if (imm < 0) { -+ emit_mov_i(ctx, rd, imm); /* rd = imm */ -+ emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ -+ emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */ -+ emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ -+ } else { /* imm >= 0 */ -+ if (imm > 0x7fff) { -+ emit_mov_i(ctx, rd, (s32)imm); /* rd = imm */ -+ emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ -+ } else { -+ emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */ -+ } -+ emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh */ -+ } -+} -+ -+/* Emulation of 64-bit sltu rd, rs, rt */ -+static void emit_sltu_r64(struct jit_context *ctx, u8 rd, -+ const u8 rs[], const u8 rt[]) -+{ -+ u8 tmp = MIPS_R_T9; -+ -+ emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */ -+ emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */ -+ emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp); /* rd = 0 if tmp != 0 */ -+ emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */ -+ emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ -+} -+ -+/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */ -+static void emit_slti_r64(struct jit_context *ctx, u8 rd, -+ const u8 rs[], s64 imm) -+{ -+ u8 t1 = MIPS_R_T8; -+ u8 t2 = MIPS_R_T9; -+ u8 cmp; -+ -+ /* -+ * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl -+ * else t1 = rsl <u imm -+ */ -+ emit_mov_i(ctx, rd, (s32)imm); -+ emit(ctx, sltu, t1, lo(rs), rd); /* t1 = rsl <u imm */ -+ emit(ctx, sltu, t2, rd, lo(rs)); /* t2 = imm <u rsl */ -+ emit(ctx, srl, rd, hi(rs), 31); /* rd = rsh >> 31 */ -+ if (imm < 0) -+ emit_movz_r(ctx, t1, t2, rd); /* t1 = rd ? t1 : t2 */ -+ else -+ emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ -+ /* -+ * if ((imm < 0 && rsh != 0xffffffff) || -+ * (imm >= 0 && rsh != 0)) -+ * t1 = 0 -+ */ -+ if (imm < 0) { -+ emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */ -+ cmp = rd; -+ } else { /* imm >= 0 */ -+ cmp = hi(rs); -+ } -+ emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp); /* t1 = 0 if cmp != 0 */ -+ -+ /* -+ * if (imm < 0) rd = rsh < -1 -+ * else rd = rsh != 0 -+ * rd = rd | t1 -+ */ -+ emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */ -+ emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ -+} -+ -+/* Emulation of 64-bit(slt rd, rs, rt) */ -+static void emit_slt_r64(struct jit_context *ctx, u8 rd, -+ const u8 rs[], const u8 rt[]) -+{ -+ u8 t1 = MIPS_R_T7; -+ u8 t2 = MIPS_R_T8; -+ u8 t3 = MIPS_R_T9; -+ -+ /* -+ * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl -+ * else t1 = rsl <u rtl -+ * if (rsh == rth) t1 = 0 -+ */ -+ emit(ctx, sltu, t1, lo(rs), lo(rt)); /* t1 = rsl <u rtl */ -+ emit(ctx, sltu, t2, lo(rt), lo(rs)); /* t2 = rtl <u rsl */ -+ emit(ctx, xor, t3, hi(rs), hi(rt)); /* t3 = rlh ^ rth */ -+ emit(ctx, srl, rd, t3, 31); /* rd = t3 >> 31 */ -+ emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ -+ emit_movn_r(ctx, t1, MIPS_R_ZERO, t3); /* t1 = 0 if t3 != 0 */ -+ -+ /* rd = (rsh < rth) | t1 */ -+ emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh <s rth */ -+ emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ -+} -+ -+/* Jump immediate (64-bit) */ -+static void emit_jmp_i64(struct jit_context *ctx, -+ const u8 dst[], s32 imm, s32 off, u8 op) -+{ -+ u8 tmp = MIPS_R_T6; -+ -+ switch (op) { -+ /* No-op, used internally for branch optimization */ -+ case JIT_JNOP: -+ break; -+ /* PC += off if dst == imm */ -+ /* PC += off if dst != imm */ -+ case BPF_JEQ: -+ case BPF_JNE: -+ if (imm >= -0x7fff && imm <= 0x8000) { -+ emit(ctx, addiu, tmp, lo(dst), -imm); -+ } else if ((u32)imm <= 0xffff) { -+ emit(ctx, xori, tmp, lo(dst), imm); -+ } else { /* Register fallback */ -+ emit_mov_i(ctx, tmp, imm); -+ emit(ctx, xor, tmp, lo(dst), tmp); -+ } -+ if (imm < 0) { /* Compare sign extension */ -+ emit(ctx, addu, MIPS_R_T9, hi(dst), 1); -+ emit(ctx, or, tmp, tmp, MIPS_R_T9); -+ } else { /* Compare zero extension */ -+ emit(ctx, or, tmp, tmp, hi(dst)); -+ } -+ if (op == BPF_JEQ) -+ emit(ctx, beqz, tmp, off); -+ else /* BPF_JNE */ -+ emit(ctx, bnez, tmp, off); -+ break; -+ /* PC += off if dst & imm */ -+ /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ -+ case BPF_JSET: -+ case JIT_JNSET: -+ if ((u32)imm <= 0xffff) { -+ emit(ctx, andi, tmp, lo(dst), imm); -+ } else { /* Register fallback */ -+ emit_mov_i(ctx, tmp, imm); -+ emit(ctx, and, tmp, lo(dst), tmp); -+ } -+ if (imm < 0) /* Sign-extension pulls in high word */ -+ emit(ctx, or, tmp, tmp, hi(dst)); -+ if (op == BPF_JSET) -+ emit(ctx, bnez, tmp, off); -+ else /* JIT_JNSET */ -+ emit(ctx, beqz, tmp, off); -+ break; -+ /* PC += off if dst > imm */ -+ case BPF_JGT: -+ emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); -+ emit(ctx, beqz, tmp, off); -+ break; -+ /* PC += off if dst >= imm */ -+ case BPF_JGE: -+ emit_sltiu_r64(ctx, tmp, dst, imm); -+ emit(ctx, beqz, tmp, off); -+ break; -+ /* PC += off if dst < imm */ -+ case BPF_JLT: -+ emit_sltiu_r64(ctx, tmp, dst, imm); -+ emit(ctx, bnez, tmp, off); -+ break; -+ /* PC += off if dst <= imm */ -+ case BPF_JLE: -+ emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); -+ emit(ctx, bnez, tmp, off); -+ break; -+ /* PC += off if dst > imm (signed) */ -+ case BPF_JSGT: -+ emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); -+ emit(ctx, beqz, tmp, off); -+ break; -+ /* PC += off if dst >= imm (signed) */ -+ case BPF_JSGE: -+ emit_slti_r64(ctx, tmp, dst, imm); -+ emit(ctx, beqz, tmp, off); -+ break; -+ /* PC += off if dst < imm (signed) */ -+ case BPF_JSLT: -+ emit_slti_r64(ctx, tmp, dst, imm); -+ emit(ctx, bnez, tmp, off); -+ break; -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JSLE: -+ emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); -+ emit(ctx, bnez, tmp, off); -+ break; -+ } -+} -+ -+/* Jump register (64-bit) */ -+static void emit_jmp_r64(struct jit_context *ctx, -+ const u8 dst[], const u8 src[], s32 off, u8 op) -+{ -+ u8 t1 = MIPS_R_T6; -+ u8 t2 = MIPS_R_T7; -+ -+ switch (op) { -+ /* No-op, used internally for branch optimization */ -+ case JIT_JNOP: -+ break; -+ /* PC += off if dst == src */ -+ /* PC += off if dst != src */ -+ case BPF_JEQ: -+ case BPF_JNE: -+ emit(ctx, subu, t1, lo(dst), lo(src)); -+ emit(ctx, subu, t2, hi(dst), hi(src)); -+ emit(ctx, or, t1, t1, t2); -+ if (op == BPF_JEQ) -+ emit(ctx, beqz, t1, off); -+ else /* BPF_JNE */ -+ emit(ctx, bnez, t1, off); -+ break; -+ /* PC += off if dst & src */ -+ /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ -+ case BPF_JSET: -+ case JIT_JNSET: -+ emit(ctx, and, t1, lo(dst), lo(src)); -+ emit(ctx, and, t2, hi(dst), hi(src)); -+ emit(ctx, or, t1, t1, t2); -+ if (op == BPF_JSET) -+ emit(ctx, bnez, t1, off); -+ else /* JIT_JNSET */ -+ emit(ctx, beqz, t1, off); -+ break; -+ /* PC += off if dst > src */ -+ case BPF_JGT: -+ emit_sltu_r64(ctx, t1, src, dst); -+ emit(ctx, bnez, t1, off); -+ break; -+ /* PC += off if dst >= src */ -+ case BPF_JGE: -+ emit_sltu_r64(ctx, t1, dst, src); -+ emit(ctx, beqz, t1, off); -+ break; -+ /* PC += off if dst < src */ -+ case BPF_JLT: -+ emit_sltu_r64(ctx, t1, dst, src); -+ emit(ctx, bnez, t1, off); -+ break; -+ /* PC += off if dst <= src */ -+ case BPF_JLE: -+ emit_sltu_r64(ctx, t1, src, dst); -+ emit(ctx, beqz, t1, off); -+ break; -+ /* PC += off if dst > src (signed) */ -+ case BPF_JSGT: -+ emit_slt_r64(ctx, t1, src, dst); -+ emit(ctx, bnez, t1, off); -+ break; -+ /* PC += off if dst >= src (signed) */ -+ case BPF_JSGE: -+ emit_slt_r64(ctx, t1, dst, src); -+ emit(ctx, beqz, t1, off); -+ break; -+ /* PC += off if dst < src (signed) */ -+ case BPF_JSLT: -+ emit_slt_r64(ctx, t1, dst, src); -+ emit(ctx, bnez, t1, off); -+ break; -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JSLE: -+ emit_slt_r64(ctx, t1, src, dst); -+ emit(ctx, beqz, t1, off); -+ break; -+ } -+} -+ -+/* Function call */ -+static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) -+{ -+ bool fixed; -+ u64 addr; -+ -+ /* Decode the call address */ -+ if (bpf_jit_get_func_addr(ctx->program, insn, false, -+ &addr, &fixed) < 0) -+ return -1; -+ if (!fixed) -+ return -1; -+ -+ /* Push stack arguments */ -+ push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK); -+ -+ /* Emit function call */ -+ emit_mov_i(ctx, MIPS_R_T9, addr); -+ emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); -+ emit(ctx, nop); /* Delay slot */ -+ -+ clobber_reg(ctx, MIPS_R_RA); -+ clobber_reg(ctx, MIPS_R_V0); -+ clobber_reg(ctx, MIPS_R_V1); -+ return 0; -+} -+ -+/* Function tail call */ -+static int emit_tail_call(struct jit_context *ctx) -+{ -+ u8 ary = lo(bpf2mips32[BPF_REG_2]); -+ u8 ind = lo(bpf2mips32[BPF_REG_3]); -+ u8 t1 = MIPS_R_T8; -+ u8 t2 = MIPS_R_T9; -+ int off; -+ -+ /* -+ * Tail call: -+ * eBPF R1 - function argument (context ptr), passed in a0-a1 -+ * eBPF R2 - ptr to object with array of function entry points -+ * eBPF R3 - array index of function to be called -+ * stack[sz] - remaining tail call count, initialized in prologue -+ */ -+ -+ /* if (ind >= ary->map.max_entries) goto out */ -+ off = offsetof(struct bpf_array, map.max_entries); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/ -+ emit_load_delay(ctx); /* Load delay slot */ -+ emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */ -+ emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */ -+ /* (next insn delay slot) */ -+ /* if (TCC-- <= 0) goto out */ -+ emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */ -+ emit_load_delay(ctx); /* Load delay slot */ -+ emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 < 0 */ -+ emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */ -+ emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */ -+ -+ /* prog = ary->ptrs[ind] */ -+ off = offsetof(struct bpf_array, ptrs); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */ -+ emit(ctx, addu, t1, t1, ary); /* t1 += ary */ -+ emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */ -+ emit_load_delay(ctx); /* Load delay slot */ -+ -+ /* if (prog == 0) goto out */ -+ emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */ -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* func = prog->bpf_func + 8 (prologue skip offset) */ -+ off = offsetof(struct bpf_prog, bpf_func); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */ -+ emit_load_delay(ctx); /* Load delay slot */ -+ emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */ -+ -+ /* goto func */ -+ build_epilogue(ctx, t1); -+ return 0; -+} -+ -+/* -+ * Stack frame layout for a JITed program (stack grows down). -+ * -+ * Higher address : Caller's stack frame : -+ * :----------------------------: -+ * : 64-bit eBPF args r3-r5 : -+ * :----------------------------: -+ * : Reserved / tail call count : -+ * +============================+ <--- MIPS sp before call -+ * | Callee-saved registers, | -+ * | including RA and FP | -+ * +----------------------------+ <--- eBPF FP (MIPS zero,fp) -+ * | Local eBPF variables | -+ * | allocated by program | -+ * +----------------------------+ -+ * | Reserved for caller-saved | -+ * | registers | -+ * +----------------------------+ -+ * | Reserved for 64-bit eBPF | -+ * | args r3-r5 & args passed | -+ * | on stack in kernel calls | -+ * Lower address +============================+ <--- MIPS sp -+ */ -+ -+/* Build program prologue to set up the stack and registers */ -+void build_prologue(struct jit_context *ctx) -+{ -+ const u8 *r1 = bpf2mips32[BPF_REG_1]; -+ const u8 *fp = bpf2mips32[BPF_REG_FP]; -+ int stack, saved, locals, reserved; -+ -+ /* -+ * The first two instructions initialize TCC in the reserved (for us) -+ * 16-byte area in the parent's stack frame. On a tail call, the -+ * calling function jumps into the prologue after these instructions. -+ */ -+ emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, -+ min(MAX_TAIL_CALL_CNT + 1, 0xffff)); -+ emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP); -+ -+ /* -+ * Register eBPF R1 contains the 32-bit context pointer argument. -+ * A 32-bit argument is always passed in MIPS register a0, regardless -+ * of CPU endianness. Initialize R1 accordingly and zero-extend. -+ */ -+#ifdef __BIG_ENDIAN -+ emit(ctx, move, lo(r1), MIPS_R_A0); -+#endif -+ -+ /* === Entry-point for tail calls === */ -+ -+ /* Zero-extend the 32-bit argument */ -+ emit(ctx, move, hi(r1), MIPS_R_ZERO); -+ -+ /* If the eBPF frame pointer was accessed it must be saved */ -+ if (ctx->accessed & BIT(BPF_REG_FP)) -+ clobber_reg64(ctx, fp); -+ -+ /* Compute the stack space needed for callee-saved registers */ -+ saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32); -+ saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); -+ -+ /* Stack space used by eBPF program local data */ -+ locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); -+ -+ /* -+ * If we are emitting function calls, reserve extra stack space for -+ * caller-saved registers and function arguments passed on the stack. -+ * The required space is computed automatically during resource -+ * usage discovery (pass 1). -+ */ -+ reserved = ctx->stack_used; -+ -+ /* Allocate the stack frame */ -+ stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); -+ emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack); -+ -+ /* Store callee-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); -+ -+ /* Initialize the eBPF frame pointer if accessed */ -+ if (ctx->accessed & BIT(BPF_REG_FP)) -+ emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved); -+ -+ ctx->saved_size = saved; -+ ctx->stack_size = stack; -+} -+ -+/* Build the program epilogue to restore the stack and registers */ -+void build_epilogue(struct jit_context *ctx, int dest_reg) -+{ -+ /* Restore callee-saved registers from stack */ -+ pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, -+ ctx->stack_size - ctx->saved_size); -+ /* -+ * A 32-bit return value is always passed in MIPS register v0, -+ * but on big-endian targets the low part of R0 is mapped to v1. -+ */ -+#ifdef __BIG_ENDIAN -+ emit(ctx, move, MIPS_R_V0, MIPS_R_V1); -+#endif -+ -+ /* Jump to the return address and adjust the stack pointer */ -+ emit(ctx, jr, dest_reg); -+ emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); -+} -+ -+/* Build one eBPF instruction */ -+int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) -+{ -+ const u8 *dst = bpf2mips32[insn->dst_reg]; -+ const u8 *src = bpf2mips32[insn->src_reg]; -+ const u8 *tmp = bpf2mips32[JIT_REG_TMP]; -+ u8 code = insn->code; -+ s16 off = insn->off; -+ s32 imm = insn->imm; -+ s32 val, rel; -+ u8 alu, jmp; -+ -+ switch (code) { -+ /* ALU operations */ -+ /* dst = imm */ -+ case BPF_ALU | BPF_MOV | BPF_K: -+ emit_mov_i(ctx, lo(dst), imm); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = src */ -+ case BPF_ALU | BPF_MOV | BPF_X: -+ if (imm == 1) { -+ /* Special mov32 for zext */ -+ emit_mov_i(ctx, hi(dst), 0); -+ } else { -+ emit_mov_r(ctx, lo(dst), lo(src)); -+ emit_zext_ver(ctx, dst); -+ } -+ break; -+ /* dst = -dst */ -+ case BPF_ALU | BPF_NEG: -+ emit_alu_i(ctx, lo(dst), 0, BPF_NEG); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst & imm */ -+ /* dst = dst | imm */ -+ /* dst = dst ^ imm */ -+ /* dst = dst << imm */ -+ /* dst = dst >> imm */ -+ /* dst = dst >> imm (arithmetic) */ -+ /* dst = dst + imm */ -+ /* dst = dst - imm */ -+ /* dst = dst * imm */ -+ /* dst = dst / imm */ -+ /* dst = dst % imm */ -+ case BPF_ALU | BPF_OR | BPF_K: -+ case BPF_ALU | BPF_AND | BPF_K: -+ case BPF_ALU | BPF_XOR | BPF_K: -+ case BPF_ALU | BPF_LSH | BPF_K: -+ case BPF_ALU | BPF_RSH | BPF_K: -+ case BPF_ALU | BPF_ARSH | BPF_K: -+ case BPF_ALU | BPF_ADD | BPF_K: -+ case BPF_ALU | BPF_SUB | BPF_K: -+ case BPF_ALU | BPF_MUL | BPF_K: -+ case BPF_ALU | BPF_DIV | BPF_K: -+ case BPF_ALU | BPF_MOD | BPF_K: -+ if (!valid_alu_i(BPF_OP(code), imm)) { -+ emit_mov_i(ctx, MIPS_R_T6, imm); -+ emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code)); -+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { -+ emit_alu_i(ctx, lo(dst), val, alu); -+ } -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst & src */ -+ /* dst = dst | src */ -+ /* dst = dst ^ src */ -+ /* dst = dst << src */ -+ /* dst = dst >> src */ -+ /* dst = dst >> src (arithmetic) */ -+ /* dst = dst + src */ -+ /* dst = dst - src */ -+ /* dst = dst * src */ -+ /* dst = dst / src */ -+ /* dst = dst % src */ -+ case BPF_ALU | BPF_AND | BPF_X: -+ case BPF_ALU | BPF_OR | BPF_X: -+ case BPF_ALU | BPF_XOR | BPF_X: -+ case BPF_ALU | BPF_LSH | BPF_X: -+ case BPF_ALU | BPF_RSH | BPF_X: -+ case BPF_ALU | BPF_ARSH | BPF_X: -+ case BPF_ALU | BPF_ADD | BPF_X: -+ case BPF_ALU | BPF_SUB | BPF_X: -+ case BPF_ALU | BPF_MUL | BPF_X: -+ case BPF_ALU | BPF_DIV | BPF_X: -+ case BPF_ALU | BPF_MOD | BPF_X: -+ emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code)); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = imm (64-bit) */ -+ case BPF_ALU64 | BPF_MOV | BPF_K: -+ emit_mov_se_i64(ctx, dst, imm); -+ break; -+ /* dst = src (64-bit) */ -+ case BPF_ALU64 | BPF_MOV | BPF_X: -+ emit_mov_r(ctx, lo(dst), lo(src)); -+ emit_mov_r(ctx, hi(dst), hi(src)); -+ break; -+ /* dst = -dst (64-bit) */ -+ case BPF_ALU64 | BPF_NEG: -+ emit_neg_i64(ctx, dst); -+ break; -+ /* dst = dst & imm (64-bit) */ -+ case BPF_ALU64 | BPF_AND | BPF_K: -+ emit_alu_i64(ctx, dst, imm, BPF_OP(code)); -+ break; -+ /* dst = dst | imm (64-bit) */ -+ /* dst = dst ^ imm (64-bit) */ -+ /* dst = dst + imm (64-bit) */ -+ /* dst = dst - imm (64-bit) */ -+ case BPF_ALU64 | BPF_OR | BPF_K: -+ case BPF_ALU64 | BPF_XOR | BPF_K: -+ case BPF_ALU64 | BPF_ADD | BPF_K: -+ case BPF_ALU64 | BPF_SUB | BPF_K: -+ if (imm) -+ emit_alu_i64(ctx, dst, imm, BPF_OP(code)); -+ break; -+ /* dst = dst << imm (64-bit) */ -+ /* dst = dst >> imm (64-bit) */ -+ /* dst = dst >> imm (64-bit, arithmetic) */ -+ case BPF_ALU64 | BPF_LSH | BPF_K: -+ case BPF_ALU64 | BPF_RSH | BPF_K: -+ case BPF_ALU64 | BPF_ARSH | BPF_K: -+ if (imm) -+ emit_shift_i64(ctx, dst, imm, BPF_OP(code)); -+ break; -+ /* dst = dst * imm (64-bit) */ -+ case BPF_ALU64 | BPF_MUL | BPF_K: -+ emit_mul_i64(ctx, dst, imm); -+ break; -+ /* dst = dst / imm (64-bit) */ -+ /* dst = dst % imm (64-bit) */ -+ case BPF_ALU64 | BPF_DIV | BPF_K: -+ case BPF_ALU64 | BPF_MOD | BPF_K: -+ /* -+ * Sign-extend the immediate value into a temporary register, -+ * and then do the operation on this register. -+ */ -+ emit_mov_se_i64(ctx, tmp, imm); -+ emit_divmod_r64(ctx, dst, tmp, BPF_OP(code)); -+ break; -+ /* dst = dst & src (64-bit) */ -+ /* dst = dst | src (64-bit) */ -+ /* dst = dst ^ src (64-bit) */ -+ /* dst = dst + src (64-bit) */ -+ /* dst = dst - src (64-bit) */ -+ case BPF_ALU64 | BPF_AND | BPF_X: -+ case BPF_ALU64 | BPF_OR | BPF_X: -+ case BPF_ALU64 | BPF_XOR | BPF_X: -+ case BPF_ALU64 | BPF_ADD | BPF_X: -+ case BPF_ALU64 | BPF_SUB | BPF_X: -+ emit_alu_r64(ctx, dst, src, BPF_OP(code)); -+ break; -+ /* dst = dst << src (64-bit) */ -+ /* dst = dst >> src (64-bit) */ -+ /* dst = dst >> src (64-bit, arithmetic) */ -+ case BPF_ALU64 | BPF_LSH | BPF_X: -+ case BPF_ALU64 | BPF_RSH | BPF_X: -+ case BPF_ALU64 | BPF_ARSH | BPF_X: -+ emit_shift_r64(ctx, dst, lo(src), BPF_OP(code)); -+ break; -+ /* dst = dst * src (64-bit) */ -+ case BPF_ALU64 | BPF_MUL | BPF_X: -+ emit_mul_r64(ctx, dst, src); -+ break; -+ /* dst = dst / src (64-bit) */ -+ /* dst = dst % src (64-bit) */ -+ case BPF_ALU64 | BPF_DIV | BPF_X: -+ case BPF_ALU64 | BPF_MOD | BPF_X: -+ emit_divmod_r64(ctx, dst, src, BPF_OP(code)); -+ break; -+ /* dst = htole(dst) */ -+ /* dst = htobe(dst) */ -+ case BPF_ALU | BPF_END | BPF_FROM_LE: -+ case BPF_ALU | BPF_END | BPF_FROM_BE: -+ if (BPF_SRC(code) == -+#ifdef __BIG_ENDIAN -+ BPF_FROM_LE -+#else -+ BPF_FROM_BE -+#endif -+ ) -+ emit_bswap_r64(ctx, dst, imm); -+ else -+ emit_trunc_r64(ctx, dst, imm); -+ break; -+ /* dst = imm64 */ -+ case BPF_LD | BPF_IMM | BPF_DW: -+ emit_mov_i(ctx, lo(dst), imm); -+ emit_mov_i(ctx, hi(dst), insn[1].imm); -+ return 1; -+ /* LDX: dst = *(size *)(src + off) */ -+ case BPF_LDX | BPF_MEM | BPF_W: -+ case BPF_LDX | BPF_MEM | BPF_H: -+ case BPF_LDX | BPF_MEM | BPF_B: -+ case BPF_LDX | BPF_MEM | BPF_DW: -+ emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code)); -+ break; -+ /* ST: *(size *)(dst + off) = imm */ -+ case BPF_ST | BPF_MEM | BPF_W: -+ case BPF_ST | BPF_MEM | BPF_H: -+ case BPF_ST | BPF_MEM | BPF_B: -+ case BPF_ST | BPF_MEM | BPF_DW: -+ switch (BPF_SIZE(code)) { -+ case BPF_DW: -+ /* Sign-extend immediate value into temporary reg */ -+ emit_mov_se_i64(ctx, tmp, imm); -+ break; -+ case BPF_W: -+ case BPF_H: -+ case BPF_B: -+ emit_mov_i(ctx, lo(tmp), imm); -+ break; -+ } -+ emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code)); -+ break; -+ /* STX: *(size *)(dst + off) = src */ -+ case BPF_STX | BPF_MEM | BPF_W: -+ case BPF_STX | BPF_MEM | BPF_H: -+ case BPF_STX | BPF_MEM | BPF_B: -+ case BPF_STX | BPF_MEM | BPF_DW: -+ emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code)); -+ break; -+ /* Speculation barrier */ -+ case BPF_ST | BPF_NOSPEC: -+ break; -+ /* Atomics */ -+ case BPF_STX | BPF_XADD | BPF_W: -+ switch (imm) { -+ case BPF_ADD: -+ case BPF_AND: -+ case BPF_OR: -+ case BPF_XOR: -+ if (cpu_has_llsc) -+ emit_atomic_r(ctx, lo(dst), lo(src), off, imm); -+ else /* Non-ll/sc fallback */ -+ emit_atomic_r32(ctx, lo(dst), lo(src), -+ off, imm); -+ break; -+ default: -+ goto notyet; -+ } -+ break; -+ /* Atomics (64-bit) */ -+ case BPF_STX | BPF_XADD | BPF_DW: -+ switch (imm) { -+ case BPF_ADD: -+ case BPF_AND: -+ case BPF_OR: -+ case BPF_XOR: -+ emit_atomic_r64(ctx, lo(dst), src, off, imm); -+ break; -+ default: -+ goto notyet; -+ } -+ break; -+ /* PC += off if dst == src */ -+ /* PC += off if dst != src */ -+ /* PC += off if dst & src */ -+ /* PC += off if dst > src */ -+ /* PC += off if dst >= src */ -+ /* PC += off if dst < src */ -+ /* PC += off if dst <= src */ -+ /* PC += off if dst > src (signed) */ -+ /* PC += off if dst >= src (signed) */ -+ /* PC += off if dst < src (signed) */ -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JMP32 | BPF_JEQ | BPF_X: -+ case BPF_JMP32 | BPF_JNE | BPF_X: -+ case BPF_JMP32 | BPF_JSET | BPF_X: -+ case BPF_JMP32 | BPF_JGT | BPF_X: -+ case BPF_JMP32 | BPF_JGE | BPF_X: -+ case BPF_JMP32 | BPF_JLT | BPF_X: -+ case BPF_JMP32 | BPF_JLE | BPF_X: -+ case BPF_JMP32 | BPF_JSGT | BPF_X: -+ case BPF_JMP32 | BPF_JSGE | BPF_X: -+ case BPF_JMP32 | BPF_JSLT | BPF_X: -+ case BPF_JMP32 | BPF_JSLE | BPF_X: -+ if (off == 0) -+ break; -+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); -+ emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp); -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == imm */ -+ /* PC += off if dst != imm */ -+ /* PC += off if dst & imm */ -+ /* PC += off if dst > imm */ -+ /* PC += off if dst >= imm */ -+ /* PC += off if dst < imm */ -+ /* PC += off if dst <= imm */ -+ /* PC += off if dst > imm (signed) */ -+ /* PC += off if dst >= imm (signed) */ -+ /* PC += off if dst < imm (signed) */ -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JMP32 | BPF_JEQ | BPF_K: -+ case BPF_JMP32 | BPF_JNE | BPF_K: -+ case BPF_JMP32 | BPF_JSET | BPF_K: -+ case BPF_JMP32 | BPF_JGT | BPF_K: -+ case BPF_JMP32 | BPF_JGE | BPF_K: -+ case BPF_JMP32 | BPF_JLT | BPF_K: -+ case BPF_JMP32 | BPF_JLE | BPF_K: -+ case BPF_JMP32 | BPF_JSGT | BPF_K: -+ case BPF_JMP32 | BPF_JSGE | BPF_K: -+ case BPF_JMP32 | BPF_JSLT | BPF_K: -+ case BPF_JMP32 | BPF_JSLE | BPF_K: -+ if (off == 0) -+ break; -+ setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); -+ if (valid_jmp_i(jmp, imm)) { -+ emit_jmp_i(ctx, lo(dst), imm, rel, jmp); -+ } else { -+ /* Move large immediate to register */ -+ emit_mov_i(ctx, MIPS_R_T6, imm); -+ emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp); -+ } -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == src */ -+ /* PC += off if dst != src */ -+ /* PC += off if dst & src */ -+ /* PC += off if dst > src */ -+ /* PC += off if dst >= src */ -+ /* PC += off if dst < src */ -+ /* PC += off if dst <= src */ -+ /* PC += off if dst > src (signed) */ -+ /* PC += off if dst >= src (signed) */ -+ /* PC += off if dst < src (signed) */ -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JMP | BPF_JEQ | BPF_X: -+ case BPF_JMP | BPF_JNE | BPF_X: -+ case BPF_JMP | BPF_JSET | BPF_X: -+ case BPF_JMP | BPF_JGT | BPF_X: -+ case BPF_JMP | BPF_JGE | BPF_X: -+ case BPF_JMP | BPF_JLT | BPF_X: -+ case BPF_JMP | BPF_JLE | BPF_X: -+ case BPF_JMP | BPF_JSGT | BPF_X: -+ case BPF_JMP | BPF_JSGE | BPF_X: -+ case BPF_JMP | BPF_JSLT | BPF_X: -+ case BPF_JMP | BPF_JSLE | BPF_X: -+ if (off == 0) -+ break; -+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); -+ emit_jmp_r64(ctx, dst, src, rel, jmp); -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == imm */ -+ /* PC += off if dst != imm */ -+ /* PC += off if dst & imm */ -+ /* PC += off if dst > imm */ -+ /* PC += off if dst >= imm */ -+ /* PC += off if dst < imm */ -+ /* PC += off if dst <= imm */ -+ /* PC += off if dst > imm (signed) */ -+ /* PC += off if dst >= imm (signed) */ -+ /* PC += off if dst < imm (signed) */ -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JMP | BPF_JEQ | BPF_K: -+ case BPF_JMP | BPF_JNE | BPF_K: -+ case BPF_JMP | BPF_JSET | BPF_K: -+ case BPF_JMP | BPF_JGT | BPF_K: -+ case BPF_JMP | BPF_JGE | BPF_K: -+ case BPF_JMP | BPF_JLT | BPF_K: -+ case BPF_JMP | BPF_JLE | BPF_K: -+ case BPF_JMP | BPF_JSGT | BPF_K: -+ case BPF_JMP | BPF_JSGE | BPF_K: -+ case BPF_JMP | BPF_JSLT | BPF_K: -+ case BPF_JMP | BPF_JSLE | BPF_K: -+ if (off == 0) -+ break; -+ setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); -+ emit_jmp_i64(ctx, dst, imm, rel, jmp); -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off */ -+ case BPF_JMP | BPF_JA: -+ if (off == 0) -+ break; -+ if (emit_ja(ctx, off) < 0) -+ goto toofar; -+ break; -+ /* Tail call */ -+ case BPF_JMP | BPF_TAIL_CALL: -+ if (emit_tail_call(ctx) < 0) -+ goto invalid; -+ break; -+ /* Function call */ -+ case BPF_JMP | BPF_CALL: -+ if (emit_call(ctx, insn) < 0) -+ goto invalid; -+ break; -+ /* Function return */ -+ case BPF_JMP | BPF_EXIT: -+ /* -+ * Optimization: when last instruction is EXIT -+ * simply continue to epilogue. -+ */ -+ if (ctx->bpf_index == ctx->program->len - 1) -+ break; -+ if (emit_exit(ctx) < 0) -+ goto toofar; -+ break; -+ -+ default: -+invalid: -+ pr_err_once("unknown opcode %02x\n", code); -+ return -EINVAL; -+notyet: -+ pr_info_once("*** NOT YET: opcode %02x ***\n", code); -+ return -EFAULT; -+toofar: -+ pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", -+ ctx->bpf_index, code); -+ return -E2BIG; -+ } -+ return 0; -+} diff --git a/target/linux/generic/backport-5.4/071-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch b/target/linux/generic/backport-5.4/071-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch deleted file mode 100644 index 38b46c0b76..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-03-mips-bpf-Add-new-eBPF-JIT-for-64-bit-MIPS.patch +++ /dev/null @@ -1,1005 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:05 +0200 -Subject: [PATCH] mips: bpf: Add new eBPF JIT for 64-bit MIPS - -This is an implementation on of an eBPF JIT for 64-bit MIPS III-V and -MIPS64r1-r6. It uses the same framework introduced by the 32-bit JIT. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> ---- - create mode 100644 arch/mips/net/bpf_jit_comp64.c - ---- /dev/null -+++ b/arch/mips/net/bpf_jit_comp64.c -@@ -0,0 +1,991 @@ -+// SPDX-License-Identifier: GPL-2.0-only -+/* -+ * Just-In-Time compiler for eBPF bytecode on MIPS. -+ * Implementation of JIT functions for 64-bit CPUs. -+ * -+ * Copyright (c) 2021 Anyfi Networks AB. -+ * Author: Johan Almbladh <johan.almbladh@gmail.com> -+ * -+ * Based on code and ideas from -+ * Copyright (c) 2017 Cavium, Inc. -+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> -+ * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> -+ */ -+ -+#include <linux/errno.h> -+#include <linux/filter.h> -+#include <linux/bpf.h> -+#include <asm/cpu-features.h> -+#include <asm/isa-rev.h> -+#include <asm/uasm.h> -+ -+#include "bpf_jit_comp.h" -+ -+/* MIPS t0-t3 are not available in the n64 ABI */ -+#undef MIPS_R_T0 -+#undef MIPS_R_T1 -+#undef MIPS_R_T2 -+#undef MIPS_R_T3 -+ -+/* Stack is 16-byte aligned in n64 ABI */ -+#define MIPS_STACK_ALIGNMENT 16 -+ -+/* Extra 64-bit eBPF registers used by JIT */ -+#define JIT_REG_TC (MAX_BPF_JIT_REG + 0) -+#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1) -+ -+/* Number of prologue bytes to skip when doing a tail call */ -+#define JIT_TCALL_SKIP 4 -+ -+/* Callee-saved CPU registers that the JIT must preserve */ -+#define JIT_CALLEE_REGS \ -+ (BIT(MIPS_R_S0) | \ -+ BIT(MIPS_R_S1) | \ -+ BIT(MIPS_R_S2) | \ -+ BIT(MIPS_R_S3) | \ -+ BIT(MIPS_R_S4) | \ -+ BIT(MIPS_R_S5) | \ -+ BIT(MIPS_R_S6) | \ -+ BIT(MIPS_R_S7) | \ -+ BIT(MIPS_R_GP) | \ -+ BIT(MIPS_R_FP) | \ -+ BIT(MIPS_R_RA)) -+ -+/* Caller-saved CPU registers available for JIT use */ -+#define JIT_CALLER_REGS \ -+ (BIT(MIPS_R_A5) | \ -+ BIT(MIPS_R_A6) | \ -+ BIT(MIPS_R_A7)) -+/* -+ * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers. -+ * MIPS registers t4 - t7 may be used by the JIT as temporary registers. -+ * MIPS registers t8 - t9 are reserved for single-register common functions. -+ */ -+static const u8 bpf2mips64[] = { -+ /* Return value from in-kernel function, and exit value from eBPF */ -+ [BPF_REG_0] = MIPS_R_V0, -+ /* Arguments from eBPF program to in-kernel function */ -+ [BPF_REG_1] = MIPS_R_A0, -+ [BPF_REG_2] = MIPS_R_A1, -+ [BPF_REG_3] = MIPS_R_A2, -+ [BPF_REG_4] = MIPS_R_A3, -+ [BPF_REG_5] = MIPS_R_A4, -+ /* Callee-saved registers that in-kernel function will preserve */ -+ [BPF_REG_6] = MIPS_R_S0, -+ [BPF_REG_7] = MIPS_R_S1, -+ [BPF_REG_8] = MIPS_R_S2, -+ [BPF_REG_9] = MIPS_R_S3, -+ /* Read-only frame pointer to access the eBPF stack */ -+ [BPF_REG_FP] = MIPS_R_FP, -+ /* Temporary register for blinding constants */ -+ [BPF_REG_AX] = MIPS_R_AT, -+ /* Tail call count register, caller-saved */ -+ [JIT_REG_TC] = MIPS_R_A5, -+ /* Constant for register zero-extension */ -+ [JIT_REG_ZX] = MIPS_R_V1, -+}; -+ -+/* -+ * MIPS 32-bit operations on 64-bit registers generate a sign-extended -+ * result. However, the eBPF ISA mandates zero-extension, so we rely on the -+ * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic -+ * operations, right shift and byte swap require properly sign-extended -+ * operands or the result is unpredictable. We emit explicit sign-extensions -+ * in those cases. -+ */ -+ -+/* Sign extension */ -+static void emit_sext(struct jit_context *ctx, u8 dst, u8 src) -+{ -+ emit(ctx, sll, dst, src, 0); -+ clobber_reg(ctx, dst); -+} -+ -+/* Zero extension */ -+static void emit_zext(struct jit_context *ctx, u8 dst) -+{ -+ if (cpu_has_mips64r2 || cpu_has_mips64r6) { -+ emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); -+ } else { -+ emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]); -+ access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */ -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Zero extension, if verifier does not do it for us */ -+static void emit_zext_ver(struct jit_context *ctx, u8 dst) -+{ -+ if (!ctx->program->aux->verifier_zext) -+ emit_zext(ctx, dst); -+} -+ -+/* dst = imm (64-bit) */ -+static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64) -+{ -+ if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) { -+ emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64); -+ } else if (imm64 >= 0xffffffff80000000ULL || -+ (imm64 < 0x80000000 && imm64 > 0xffff)) { -+ emit(ctx, lui, dst, (s16)(imm64 >> 16)); -+ emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff); -+ } else { -+ u8 acc = MIPS_R_ZERO; -+ int k; -+ -+ for (k = 0; k < 4; k++) { -+ u16 half = imm64 >> (48 - 16 * k); -+ -+ if (acc == dst) -+ emit(ctx, dsll, dst, dst, 16); -+ -+ if (half) { -+ emit(ctx, ori, dst, acc, half); -+ acc = dst; -+ } -+ } -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* ALU immediate operation (64-bit) */ -+static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = dst | imm */ -+ case BPF_OR: -+ emit(ctx, ori, dst, dst, (u16)imm); -+ break; -+ /* dst = dst ^ imm */ -+ case BPF_XOR: -+ emit(ctx, xori, dst, dst, (u16)imm); -+ break; -+ /* dst = -dst */ -+ case BPF_NEG: -+ emit(ctx, dsubu, dst, MIPS_R_ZERO, dst); -+ break; -+ /* dst = dst << imm */ -+ case BPF_LSH: -+ emit(ctx, dsll_safe, dst, dst, imm); -+ break; -+ /* dst = dst >> imm */ -+ case BPF_RSH: -+ emit(ctx, dsrl_safe, dst, dst, imm); -+ break; -+ /* dst = dst >> imm (arithmetic) */ -+ case BPF_ARSH: -+ emit(ctx, dsra_safe, dst, dst, imm); -+ break; -+ /* dst = dst + imm */ -+ case BPF_ADD: -+ emit(ctx, daddiu, dst, dst, imm); -+ break; -+ /* dst = dst - imm */ -+ case BPF_SUB: -+ emit(ctx, daddiu, dst, dst, -imm); -+ break; -+ default: -+ /* Width-generic operations */ -+ emit_alu_i(ctx, dst, imm, op); -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* ALU register operation (64-bit) */ -+static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op) -+{ -+ switch (BPF_OP(op)) { -+ /* dst = dst << src */ -+ case BPF_LSH: -+ emit(ctx, dsllv, dst, dst, src); -+ break; -+ /* dst = dst >> src */ -+ case BPF_RSH: -+ emit(ctx, dsrlv, dst, dst, src); -+ break; -+ /* dst = dst >> src (arithmetic) */ -+ case BPF_ARSH: -+ emit(ctx, dsrav, dst, dst, src); -+ break; -+ /* dst = dst + src */ -+ case BPF_ADD: -+ emit(ctx, daddu, dst, dst, src); -+ break; -+ /* dst = dst - src */ -+ case BPF_SUB: -+ emit(ctx, dsubu, dst, dst, src); -+ break; -+ /* dst = dst * src */ -+ case BPF_MUL: -+ if (cpu_has_mips64r6) { -+ emit(ctx, dmulu, dst, dst, src); -+ } else { -+ emit(ctx, dmultu, dst, src); -+ emit(ctx, mflo, dst); -+ } -+ break; -+ /* dst = dst / src */ -+ case BPF_DIV: -+ if (cpu_has_mips64r6) { -+ emit(ctx, ddivu_r6, dst, dst, src); -+ } else { -+ emit(ctx, ddivu, dst, src); -+ emit(ctx, mflo, dst); -+ } -+ break; -+ /* dst = dst % src */ -+ case BPF_MOD: -+ if (cpu_has_mips64r6) { -+ emit(ctx, dmodu, dst, dst, src); -+ } else { -+ emit(ctx, ddivu, dst, src); -+ emit(ctx, mfhi, dst); -+ } -+ break; -+ default: -+ /* Width-generic operations */ -+ emit_alu_r(ctx, dst, src, op); -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Swap sub words in a register double word */ -+static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits) -+{ -+ u8 tmp = MIPS_R_T9; -+ -+ emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */ -+ emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */ -+ emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */ -+ emit(ctx, and, dst, dst, mask); /* dst = dst & mask */ -+ emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ -+} -+ -+/* Swap bytes and truncate a register double word, word or half word */ -+static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width) -+{ -+ switch (width) { -+ /* Swap bytes in a double word */ -+ case 64: -+ if (cpu_has_mips64r2 || cpu_has_mips64r6) { -+ emit(ctx, dsbh, dst, dst); -+ emit(ctx, dshd, dst, dst); -+ } else { -+ u8 t1 = MIPS_R_T6; -+ u8 t2 = MIPS_R_T7; -+ -+ emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */ -+ emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */ -+ emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */ -+ -+ emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff); -+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ -+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ -+ emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */ -+ -+ emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */ -+ emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */ -+ emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ -+ emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ -+ emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */ -+ } -+ break; -+ /* Swap bytes in a half word */ -+ /* Swap bytes in a word */ -+ case 32: -+ case 16: -+ emit_sext(ctx, dst, dst); -+ emit_bswap_r(ctx, dst, width); -+ if (cpu_has_mips64r2 || cpu_has_mips64r6) -+ emit_zext(ctx, dst); -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Truncate a register double word, word or half word */ -+static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width) -+{ -+ switch (width) { -+ case 64: -+ break; -+ /* Zero-extend a word */ -+ case 32: -+ emit_zext(ctx, dst); -+ break; -+ /* Zero-extend a half word */ -+ case 16: -+ emit(ctx, andi, dst, dst, 0xffff); -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Load operation: dst = *(size*)(src + off) */ -+static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) -+{ -+ switch (size) { -+ /* Load a byte */ -+ case BPF_B: -+ emit(ctx, lbu, dst, off, src); -+ break; -+ /* Load a half word */ -+ case BPF_H: -+ emit(ctx, lhu, dst, off, src); -+ break; -+ /* Load a word */ -+ case BPF_W: -+ emit(ctx, lwu, dst, off, src); -+ break; -+ /* Load a double word */ -+ case BPF_DW: -+ emit(ctx, ld, dst, off, src); -+ break; -+ } -+ clobber_reg(ctx, dst); -+} -+ -+/* Store operation: *(size *)(dst + off) = src */ -+static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) -+{ -+ switch (size) { -+ /* Store a byte */ -+ case BPF_B: -+ emit(ctx, sb, src, off, dst); -+ break; -+ /* Store a half word */ -+ case BPF_H: -+ emit(ctx, sh, src, off, dst); -+ break; -+ /* Store a word */ -+ case BPF_W: -+ emit(ctx, sw, src, off, dst); -+ break; -+ /* Store a double word */ -+ case BPF_DW: -+ emit(ctx, sd, src, off, dst); -+ break; -+ } -+} -+ -+/* Atomic read-modify-write */ -+static void emit_atomic_r64(struct jit_context *ctx, -+ u8 dst, u8 src, s16 off, u8 code) -+{ -+ u8 t1 = MIPS_R_T6; -+ u8 t2 = MIPS_R_T7; -+ -+ emit(ctx, lld, t1, off, dst); -+ switch (code) { -+ case BPF_ADD: -+ emit(ctx, daddu, t2, t1, src); -+ break; -+ case BPF_AND: -+ emit(ctx, and, t2, t1, src); -+ break; -+ case BPF_OR: -+ emit(ctx, or, t2, t1, src); -+ break; -+ case BPF_XOR: -+ emit(ctx, xor, t2, t1, src); -+ break; -+ } -+ emit(ctx, scd, t2, off, dst); -+ emit(ctx, beqz, t2, -16); -+ emit(ctx, nop); /* Delay slot */ -+} -+ -+/* Function call */ -+static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) -+{ -+ u8 zx = bpf2mips64[JIT_REG_ZX]; -+ u8 tmp = MIPS_R_T6; -+ bool fixed; -+ u64 addr; -+ -+ /* Decode the call address */ -+ if (bpf_jit_get_func_addr(ctx->program, insn, false, -+ &addr, &fixed) < 0) -+ return -1; -+ if (!fixed) -+ return -1; -+ -+ /* Push caller-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); -+ -+ /* Emit function call */ -+ emit_mov_i64(ctx, tmp, addr); -+ emit(ctx, jalr, MIPS_R_RA, tmp); -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* Restore caller-saved registers */ -+ pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); -+ -+ /* Re-initialize the JIT zero-extension register if accessed */ -+ if (ctx->accessed & BIT(JIT_REG_ZX)) { -+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); -+ emit(ctx, dsrl32, zx, zx, 0); -+ } -+ -+ clobber_reg(ctx, MIPS_R_RA); -+ clobber_reg(ctx, MIPS_R_V0); -+ clobber_reg(ctx, MIPS_R_V1); -+ return 0; -+} -+ -+/* Function tail call */ -+static int emit_tail_call(struct jit_context *ctx) -+{ -+ u8 ary = bpf2mips64[BPF_REG_2]; -+ u8 ind = bpf2mips64[BPF_REG_3]; -+ u8 tcc = bpf2mips64[JIT_REG_TC]; -+ u8 tmp = MIPS_R_T6; -+ int off; -+ -+ /* -+ * Tail call: -+ * eBPF R1 - function argument (context ptr), passed in a0-a1 -+ * eBPF R2 - ptr to object with array of function entry points -+ * eBPF R3 - array index of function to be called -+ */ -+ -+ /* if (ind >= ary->map.max_entries) goto out */ -+ off = offsetof(struct bpf_array, map.max_entries); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/ -+ emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */ -+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ -+ -+ /* if (--TCC < 0) goto out */ -+ emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */ -+ emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */ -+ /* (next insn delay slot) */ -+ /* prog = ary->ptrs[ind] */ -+ off = offsetof(struct bpf_array, ptrs); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */ -+ emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */ -+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ -+ -+ /* if (prog == 0) goto out */ -+ emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ -+ emit(ctx, nop); /* Delay slot */ -+ -+ /* func = prog->bpf_func + 8 (prologue skip offset) */ -+ off = offsetof(struct bpf_prog, bpf_func); -+ if (off > 0x7fff) -+ return -1; -+ emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ -+ emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */ -+ -+ /* goto func */ -+ build_epilogue(ctx, tmp); -+ access_reg(ctx, JIT_REG_TC); -+ return 0; -+} -+ -+/* -+ * Stack frame layout for a JITed program (stack grows down). -+ * -+ * Higher address : Previous stack frame : -+ * +===========================+ <--- MIPS sp before call -+ * | Callee-saved registers, | -+ * | including RA and FP | -+ * +---------------------------+ <--- eBPF FP (MIPS fp) -+ * | Local eBPF variables | -+ * | allocated by program | -+ * +---------------------------+ -+ * | Reserved for caller-saved | -+ * | registers | -+ * Lower address +===========================+ <--- MIPS sp -+ */ -+ -+/* Build program prologue to set up the stack and registers */ -+void build_prologue(struct jit_context *ctx) -+{ -+ u8 fp = bpf2mips64[BPF_REG_FP]; -+ u8 tc = bpf2mips64[JIT_REG_TC]; -+ u8 zx = bpf2mips64[JIT_REG_ZX]; -+ int stack, saved, locals, reserved; -+ -+ /* -+ * The first instruction initializes the tail call count register. -+ * On a tail call, the calling function jumps into the prologue -+ * after this instruction. -+ */ -+ emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff)); -+ -+ /* === Entry-point for tail calls === */ -+ -+ /* -+ * If the eBPF frame pointer and tail call count registers were -+ * accessed they must be preserved. Mark them as clobbered here -+ * to save and restore them on the stack as needed. -+ */ -+ if (ctx->accessed & BIT(BPF_REG_FP)) -+ clobber_reg(ctx, fp); -+ if (ctx->accessed & BIT(JIT_REG_TC)) -+ clobber_reg(ctx, tc); -+ if (ctx->accessed & BIT(JIT_REG_ZX)) -+ clobber_reg(ctx, zx); -+ -+ /* Compute the stack space needed for callee-saved registers */ -+ saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64); -+ saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); -+ -+ /* Stack space used by eBPF program local data */ -+ locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); -+ -+ /* -+ * If we are emitting function calls, reserve extra stack space for -+ * caller-saved registers needed by the JIT. The required space is -+ * computed automatically during resource usage discovery (pass 1). -+ */ -+ reserved = ctx->stack_used; -+ -+ /* Allocate the stack frame */ -+ stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); -+ if (stack) -+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack); -+ -+ /* Store callee-saved registers on stack */ -+ push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); -+ -+ /* Initialize the eBPF frame pointer if accessed */ -+ if (ctx->accessed & BIT(BPF_REG_FP)) -+ emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved); -+ -+ /* Initialize the ePF JIT zero-extension register if accessed */ -+ if (ctx->accessed & BIT(JIT_REG_ZX)) { -+ emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); -+ emit(ctx, dsrl32, zx, zx, 0); -+ } -+ -+ ctx->saved_size = saved; -+ ctx->stack_size = stack; -+} -+ -+/* Build the program epilogue to restore the stack and registers */ -+void build_epilogue(struct jit_context *ctx, int dest_reg) -+{ -+ /* Restore callee-saved registers from stack */ -+ pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, -+ ctx->stack_size - ctx->saved_size); -+ -+ /* Release the stack frame */ -+ if (ctx->stack_size) -+ emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); -+ -+ /* Jump to return address and sign-extend the 32-bit return value */ -+ emit(ctx, jr, dest_reg); -+ emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */ -+} -+ -+/* Build one eBPF instruction */ -+int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) -+{ -+ u8 dst = bpf2mips64[insn->dst_reg]; -+ u8 src = bpf2mips64[insn->src_reg]; -+ u8 code = insn->code; -+ s16 off = insn->off; -+ s32 imm = insn->imm; -+ s32 val, rel; -+ u8 alu, jmp; -+ -+ switch (code) { -+ /* ALU operations */ -+ /* dst = imm */ -+ case BPF_ALU | BPF_MOV | BPF_K: -+ emit_mov_i(ctx, dst, imm); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = src */ -+ case BPF_ALU | BPF_MOV | BPF_X: -+ if (imm == 1) { -+ /* Special mov32 for zext */ -+ emit_zext(ctx, dst); -+ } else { -+ emit_mov_r(ctx, dst, src); -+ emit_zext_ver(ctx, dst); -+ } -+ break; -+ /* dst = -dst */ -+ case BPF_ALU | BPF_NEG: -+ emit_sext(ctx, dst, dst); -+ emit_alu_i(ctx, dst, 0, BPF_NEG); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst & imm */ -+ /* dst = dst | imm */ -+ /* dst = dst ^ imm */ -+ /* dst = dst << imm */ -+ case BPF_ALU | BPF_OR | BPF_K: -+ case BPF_ALU | BPF_AND | BPF_K: -+ case BPF_ALU | BPF_XOR | BPF_K: -+ case BPF_ALU | BPF_LSH | BPF_K: -+ if (!valid_alu_i(BPF_OP(code), imm)) { -+ emit_mov_i(ctx, MIPS_R_T4, imm); -+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); -+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { -+ emit_alu_i(ctx, dst, val, alu); -+ } -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst >> imm */ -+ /* dst = dst >> imm (arithmetic) */ -+ /* dst = dst + imm */ -+ /* dst = dst - imm */ -+ /* dst = dst * imm */ -+ /* dst = dst / imm */ -+ /* dst = dst % imm */ -+ case BPF_ALU | BPF_RSH | BPF_K: -+ case BPF_ALU | BPF_ARSH | BPF_K: -+ case BPF_ALU | BPF_ADD | BPF_K: -+ case BPF_ALU | BPF_SUB | BPF_K: -+ case BPF_ALU | BPF_MUL | BPF_K: -+ case BPF_ALU | BPF_DIV | BPF_K: -+ case BPF_ALU | BPF_MOD | BPF_K: -+ if (!valid_alu_i(BPF_OP(code), imm)) { -+ emit_sext(ctx, dst, dst); -+ emit_mov_i(ctx, MIPS_R_T4, imm); -+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); -+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { -+ emit_sext(ctx, dst, dst); -+ emit_alu_i(ctx, dst, val, alu); -+ } -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst & src */ -+ /* dst = dst | src */ -+ /* dst = dst ^ src */ -+ /* dst = dst << src */ -+ case BPF_ALU | BPF_AND | BPF_X: -+ case BPF_ALU | BPF_OR | BPF_X: -+ case BPF_ALU | BPF_XOR | BPF_X: -+ case BPF_ALU | BPF_LSH | BPF_X: -+ emit_alu_r(ctx, dst, src, BPF_OP(code)); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = dst >> src */ -+ /* dst = dst >> src (arithmetic) */ -+ /* dst = dst + src */ -+ /* dst = dst - src */ -+ /* dst = dst * src */ -+ /* dst = dst / src */ -+ /* dst = dst % src */ -+ case BPF_ALU | BPF_RSH | BPF_X: -+ case BPF_ALU | BPF_ARSH | BPF_X: -+ case BPF_ALU | BPF_ADD | BPF_X: -+ case BPF_ALU | BPF_SUB | BPF_X: -+ case BPF_ALU | BPF_MUL | BPF_X: -+ case BPF_ALU | BPF_DIV | BPF_X: -+ case BPF_ALU | BPF_MOD | BPF_X: -+ emit_sext(ctx, dst, dst); -+ emit_sext(ctx, MIPS_R_T4, src); -+ emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); -+ emit_zext_ver(ctx, dst); -+ break; -+ /* dst = imm (64-bit) */ -+ case BPF_ALU64 | BPF_MOV | BPF_K: -+ emit_mov_i(ctx, dst, imm); -+ break; -+ /* dst = src (64-bit) */ -+ case BPF_ALU64 | BPF_MOV | BPF_X: -+ emit_mov_r(ctx, dst, src); -+ break; -+ /* dst = -dst (64-bit) */ -+ case BPF_ALU64 | BPF_NEG: -+ emit_alu_i64(ctx, dst, 0, BPF_NEG); -+ break; -+ /* dst = dst & imm (64-bit) */ -+ /* dst = dst | imm (64-bit) */ -+ /* dst = dst ^ imm (64-bit) */ -+ /* dst = dst << imm (64-bit) */ -+ /* dst = dst >> imm (64-bit) */ -+ /* dst = dst >> imm ((64-bit, arithmetic) */ -+ /* dst = dst + imm (64-bit) */ -+ /* dst = dst - imm (64-bit) */ -+ /* dst = dst * imm (64-bit) */ -+ /* dst = dst / imm (64-bit) */ -+ /* dst = dst % imm (64-bit) */ -+ case BPF_ALU64 | BPF_AND | BPF_K: -+ case BPF_ALU64 | BPF_OR | BPF_K: -+ case BPF_ALU64 | BPF_XOR | BPF_K: -+ case BPF_ALU64 | BPF_LSH | BPF_K: -+ case BPF_ALU64 | BPF_RSH | BPF_K: -+ case BPF_ALU64 | BPF_ARSH | BPF_K: -+ case BPF_ALU64 | BPF_ADD | BPF_K: -+ case BPF_ALU64 | BPF_SUB | BPF_K: -+ case BPF_ALU64 | BPF_MUL | BPF_K: -+ case BPF_ALU64 | BPF_DIV | BPF_K: -+ case BPF_ALU64 | BPF_MOD | BPF_K: -+ if (!valid_alu_i(BPF_OP(code), imm)) { -+ emit_mov_i(ctx, MIPS_R_T4, imm); -+ emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code)); -+ } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { -+ emit_alu_i64(ctx, dst, val, alu); -+ } -+ break; -+ /* dst = dst & src (64-bit) */ -+ /* dst = dst | src (64-bit) */ -+ /* dst = dst ^ src (64-bit) */ -+ /* dst = dst << src (64-bit) */ -+ /* dst = dst >> src (64-bit) */ -+ /* dst = dst >> src (64-bit, arithmetic) */ -+ /* dst = dst + src (64-bit) */ -+ /* dst = dst - src (64-bit) */ -+ /* dst = dst * src (64-bit) */ -+ /* dst = dst / src (64-bit) */ -+ /* dst = dst % src (64-bit) */ -+ case BPF_ALU64 | BPF_AND | BPF_X: -+ case BPF_ALU64 | BPF_OR | BPF_X: -+ case BPF_ALU64 | BPF_XOR | BPF_X: -+ case BPF_ALU64 | BPF_LSH | BPF_X: -+ case BPF_ALU64 | BPF_RSH | BPF_X: -+ case BPF_ALU64 | BPF_ARSH | BPF_X: -+ case BPF_ALU64 | BPF_ADD | BPF_X: -+ case BPF_ALU64 | BPF_SUB | BPF_X: -+ case BPF_ALU64 | BPF_MUL | BPF_X: -+ case BPF_ALU64 | BPF_DIV | BPF_X: -+ case BPF_ALU64 | BPF_MOD | BPF_X: -+ emit_alu_r64(ctx, dst, src, BPF_OP(code)); -+ break; -+ /* dst = htole(dst) */ -+ /* dst = htobe(dst) */ -+ case BPF_ALU | BPF_END | BPF_FROM_LE: -+ case BPF_ALU | BPF_END | BPF_FROM_BE: -+ if (BPF_SRC(code) == -+#ifdef __BIG_ENDIAN -+ BPF_FROM_LE -+#else -+ BPF_FROM_BE -+#endif -+ ) -+ emit_bswap_r64(ctx, dst, imm); -+ else -+ emit_trunc_r64(ctx, dst, imm); -+ break; -+ /* dst = imm64 */ -+ case BPF_LD | BPF_IMM | BPF_DW: -+ emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32)); -+ return 1; -+ /* LDX: dst = *(size *)(src + off) */ -+ case BPF_LDX | BPF_MEM | BPF_W: -+ case BPF_LDX | BPF_MEM | BPF_H: -+ case BPF_LDX | BPF_MEM | BPF_B: -+ case BPF_LDX | BPF_MEM | BPF_DW: -+ emit_ldx(ctx, dst, src, off, BPF_SIZE(code)); -+ break; -+ /* ST: *(size *)(dst + off) = imm */ -+ case BPF_ST | BPF_MEM | BPF_W: -+ case BPF_ST | BPF_MEM | BPF_H: -+ case BPF_ST | BPF_MEM | BPF_B: -+ case BPF_ST | BPF_MEM | BPF_DW: -+ emit_mov_i(ctx, MIPS_R_T4, imm); -+ emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code)); -+ break; -+ /* STX: *(size *)(dst + off) = src */ -+ case BPF_STX | BPF_MEM | BPF_W: -+ case BPF_STX | BPF_MEM | BPF_H: -+ case BPF_STX | BPF_MEM | BPF_B: -+ case BPF_STX | BPF_MEM | BPF_DW: -+ emit_stx(ctx, dst, src, off, BPF_SIZE(code)); -+ break; -+ /* Speculation barrier */ -+ case BPF_ST | BPF_NOSPEC: -+ break; -+ /* Atomics */ -+ case BPF_STX | BPF_XADD | BPF_W: -+ case BPF_STX | BPF_XADD | BPF_DW: -+ switch (imm) { -+ case BPF_ADD: -+ case BPF_AND: -+ case BPF_OR: -+ case BPF_XOR: -+ if (BPF_SIZE(code) == BPF_DW) { -+ emit_atomic_r64(ctx, dst, src, off, imm); -+ } else { /* 32-bit, no fetch */ -+ emit_sext(ctx, MIPS_R_T4, src); -+ emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm); -+ } -+ break; -+ default: -+ goto notyet; -+ } -+ break; -+ /* PC += off if dst == src */ -+ /* PC += off if dst != src */ -+ /* PC += off if dst & src */ -+ /* PC += off if dst > src */ -+ /* PC += off if dst >= src */ -+ /* PC += off if dst < src */ -+ /* PC += off if dst <= src */ -+ /* PC += off if dst > src (signed) */ -+ /* PC += off if dst >= src (signed) */ -+ /* PC += off if dst < src (signed) */ -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JMP32 | BPF_JEQ | BPF_X: -+ case BPF_JMP32 | BPF_JNE | BPF_X: -+ case BPF_JMP32 | BPF_JSET | BPF_X: -+ case BPF_JMP32 | BPF_JGT | BPF_X: -+ case BPF_JMP32 | BPF_JGE | BPF_X: -+ case BPF_JMP32 | BPF_JLT | BPF_X: -+ case BPF_JMP32 | BPF_JLE | BPF_X: -+ case BPF_JMP32 | BPF_JSGT | BPF_X: -+ case BPF_JMP32 | BPF_JSGE | BPF_X: -+ case BPF_JMP32 | BPF_JSLT | BPF_X: -+ case BPF_JMP32 | BPF_JSLE | BPF_X: -+ if (off == 0) -+ break; -+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); -+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ -+ emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */ -+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == imm */ -+ /* PC += off if dst != imm */ -+ /* PC += off if dst & imm */ -+ /* PC += off if dst > imm */ -+ /* PC += off if dst >= imm */ -+ /* PC += off if dst < imm */ -+ /* PC += off if dst <= imm */ -+ /* PC += off if dst > imm (signed) */ -+ /* PC += off if dst >= imm (signed) */ -+ /* PC += off if dst < imm (signed) */ -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JMP32 | BPF_JEQ | BPF_K: -+ case BPF_JMP32 | BPF_JNE | BPF_K: -+ case BPF_JMP32 | BPF_JSET | BPF_K: -+ case BPF_JMP32 | BPF_JGT | BPF_K: -+ case BPF_JMP32 | BPF_JGE | BPF_K: -+ case BPF_JMP32 | BPF_JLT | BPF_K: -+ case BPF_JMP32 | BPF_JLE | BPF_K: -+ case BPF_JMP32 | BPF_JSGT | BPF_K: -+ case BPF_JMP32 | BPF_JSGE | BPF_K: -+ case BPF_JMP32 | BPF_JSLT | BPF_K: -+ case BPF_JMP32 | BPF_JSLE | BPF_K: -+ if (off == 0) -+ break; -+ setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); -+ emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ -+ if (valid_jmp_i(jmp, imm)) { -+ emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp); -+ } else { -+ /* Move large immediate to register, sign-extended */ -+ emit_mov_i(ctx, MIPS_R_T5, imm); -+ emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); -+ } -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == src */ -+ /* PC += off if dst != src */ -+ /* PC += off if dst & src */ -+ /* PC += off if dst > src */ -+ /* PC += off if dst >= src */ -+ /* PC += off if dst < src */ -+ /* PC += off if dst <= src */ -+ /* PC += off if dst > src (signed) */ -+ /* PC += off if dst >= src (signed) */ -+ /* PC += off if dst < src (signed) */ -+ /* PC += off if dst <= src (signed) */ -+ case BPF_JMP | BPF_JEQ | BPF_X: -+ case BPF_JMP | BPF_JNE | BPF_X: -+ case BPF_JMP | BPF_JSET | BPF_X: -+ case BPF_JMP | BPF_JGT | BPF_X: -+ case BPF_JMP | BPF_JGE | BPF_X: -+ case BPF_JMP | BPF_JLT | BPF_X: -+ case BPF_JMP | BPF_JLE | BPF_X: -+ case BPF_JMP | BPF_JSGT | BPF_X: -+ case BPF_JMP | BPF_JSGE | BPF_X: -+ case BPF_JMP | BPF_JSLT | BPF_X: -+ case BPF_JMP | BPF_JSLE | BPF_X: -+ if (off == 0) -+ break; -+ setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); -+ emit_jmp_r(ctx, dst, src, rel, jmp); -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off if dst == imm */ -+ /* PC += off if dst != imm */ -+ /* PC += off if dst & imm */ -+ /* PC += off if dst > imm */ -+ /* PC += off if dst >= imm */ -+ /* PC += off if dst < imm */ -+ /* PC += off if dst <= imm */ -+ /* PC += off if dst > imm (signed) */ -+ /* PC += off if dst >= imm (signed) */ -+ /* PC += off if dst < imm (signed) */ -+ /* PC += off if dst <= imm (signed) */ -+ case BPF_JMP | BPF_JEQ | BPF_K: -+ case BPF_JMP | BPF_JNE | BPF_K: -+ case BPF_JMP | BPF_JSET | BPF_K: -+ case BPF_JMP | BPF_JGT | BPF_K: -+ case BPF_JMP | BPF_JGE | BPF_K: -+ case BPF_JMP | BPF_JLT | BPF_K: -+ case BPF_JMP | BPF_JLE | BPF_K: -+ case BPF_JMP | BPF_JSGT | BPF_K: -+ case BPF_JMP | BPF_JSGE | BPF_K: -+ case BPF_JMP | BPF_JSLT | BPF_K: -+ case BPF_JMP | BPF_JSLE | BPF_K: -+ if (off == 0) -+ break; -+ setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); -+ if (valid_jmp_i(jmp, imm)) { -+ emit_jmp_i(ctx, dst, imm, rel, jmp); -+ } else { -+ /* Move large immediate to register */ -+ emit_mov_i(ctx, MIPS_R_T4, imm); -+ emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp); -+ } -+ if (finish_jmp(ctx, jmp, off) < 0) -+ goto toofar; -+ break; -+ /* PC += off */ -+ case BPF_JMP | BPF_JA: -+ if (off == 0) -+ break; -+ if (emit_ja(ctx, off) < 0) -+ goto toofar; -+ break; -+ /* Tail call */ -+ case BPF_JMP | BPF_TAIL_CALL: -+ if (emit_tail_call(ctx) < 0) -+ goto invalid; -+ break; -+ /* Function call */ -+ case BPF_JMP | BPF_CALL: -+ if (emit_call(ctx, insn) < 0) -+ goto invalid; -+ break; -+ /* Function return */ -+ case BPF_JMP | BPF_EXIT: -+ /* -+ * Optimization: when last instruction is EXIT -+ * simply continue to epilogue. -+ */ -+ if (ctx->bpf_index == ctx->program->len - 1) -+ break; -+ if (emit_exit(ctx) < 0) -+ goto toofar; -+ break; -+ -+ default: -+invalid: -+ pr_err_once("unknown opcode %02x\n", code); -+ return -EINVAL; -+notyet: -+ pr_info_once("*** NOT YET: opcode %02x ***\n", code); -+ return -EFAULT; -+toofar: -+ pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", -+ ctx->bpf_index, code); -+ return -E2BIG; -+ } -+ return 0; -+} diff --git a/target/linux/generic/backport-5.4/071-v5.16-04-mips-bpf-Add-JIT-workarounds-for-CPU-errata.patch b/target/linux/generic/backport-5.4/071-v5.16-04-mips-bpf-Add-JIT-workarounds-for-CPU-errata.patch deleted file mode 100644 index 63553ebe58..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-04-mips-bpf-Add-JIT-workarounds-for-CPU-errata.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:06 +0200 -Subject: [PATCH] mips: bpf: Add JIT workarounds for CPU errata - -This patch adds workarounds for the following CPU errata to the MIPS -eBPF JIT, if enabled in the kernel configuration. - - - R10000 ll/sc weak ordering - - Loongson-3 ll/sc weak ordering - - Loongson-2F jump hang - -The Loongson-2F nop errata is implemented in uasm, which the JIT uses, -so no additional mitigations are needed for that. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com> ---- - ---- a/arch/mips/net/bpf_jit_comp.c -+++ b/arch/mips/net/bpf_jit_comp.c -@@ -404,6 +404,7 @@ void emit_alu_r(struct jit_context *ctx, - /* Atomic read-modify-write (32-bit) */ - void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) - { -+ LLSC_sync(ctx); - emit(ctx, ll, MIPS_R_T9, off, dst); - switch (code) { - case BPF_ADD: -@@ -420,18 +421,19 @@ void emit_atomic_r(struct jit_context *c - break; - } - emit(ctx, sc, MIPS_R_T8, off, dst); -- emit(ctx, beqz, MIPS_R_T8, -16); -+ emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); - emit(ctx, nop); /* Delay slot */ - } - - /* Atomic compare-and-exchange (32-bit) */ - void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) - { -+ LLSC_sync(ctx); - emit(ctx, ll, MIPS_R_T9, off, dst); - emit(ctx, bne, MIPS_R_T9, res, 12); - emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ - emit(ctx, sc, MIPS_R_T8, off, dst); -- emit(ctx, beqz, MIPS_R_T8, -20); -+ emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); - emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ - clobber_reg(ctx, res); - } ---- a/arch/mips/net/bpf_jit_comp.h -+++ b/arch/mips/net/bpf_jit_comp.h -@@ -87,7 +87,7 @@ struct jit_context { - }; - - /* Emit the instruction if the JIT memory space has been allocated */ --#define emit(ctx, func, ...) \ -+#define __emit(ctx, func, ...) \ - do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->jit_index]; \ -@@ -95,6 +95,30 @@ do { \ - } \ - (ctx)->jit_index++; \ - } while (0) -+#define emit(...) __emit(__VA_ARGS__) -+ -+/* Workaround for R10000 ll/sc errata */ -+#ifdef CONFIG_WAR_R10000 -+#define LLSC_beqz beqzl -+#else -+#define LLSC_beqz beqz -+#endif -+ -+/* Workaround for Loongson-3 ll/sc errata */ -+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS -+#define LLSC_sync(ctx) emit(ctx, sync, 0) -+#define LLSC_offset 4 -+#else -+#define LLSC_sync(ctx) -+#define LLSC_offset 0 -+#endif -+ -+/* Workaround for Loongson-2F jump errata */ -+#ifdef CONFIG_CPU_JUMP_WORKAROUNDS -+#define JALR_MASK 0xffffffffcfffffffULL -+#else -+#define JALR_MASK (~0ULL) -+#endif - - /* - * Mark a BPF register as accessed, it needs to be ---- a/arch/mips/net/bpf_jit_comp64.c -+++ b/arch/mips/net/bpf_jit_comp64.c -@@ -375,6 +375,7 @@ static void emit_atomic_r64(struct jit_c - u8 t1 = MIPS_R_T6; - u8 t2 = MIPS_R_T7; - -+ LLSC_sync(ctx); - emit(ctx, lld, t1, off, dst); - switch (code) { - case BPF_ADD: -@@ -391,7 +392,7 @@ static void emit_atomic_r64(struct jit_c - break; - } - emit(ctx, scd, t2, off, dst); -- emit(ctx, beqz, t2, -16); -+ emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset); - emit(ctx, nop); /* Delay slot */ - } - -@@ -414,7 +415,7 @@ static int emit_call(struct jit_context - push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); - - /* Emit function call */ -- emit_mov_i64(ctx, tmp, addr); -+ emit_mov_i64(ctx, tmp, addr & JALR_MASK); - emit(ctx, jalr, MIPS_R_RA, tmp); - emit(ctx, nop); /* Delay slot */ - diff --git a/target/linux/generic/backport-5.4/071-v5.16-05-mips-bpf-Enable-eBPF-JITs.patch b/target/linux/generic/backport-5.4/071-v5.16-05-mips-bpf-Enable-eBPF-JITs.patch deleted file mode 100644 index c8ce0becf9..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-05-mips-bpf-Enable-eBPF-JITs.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:07 +0200 -Subject: [PATCH] mips: bpf: Enable eBPF JITs - -This patch enables the new eBPF JITs for 32-bit and 64-bit MIPS. It also -disables the old cBPF JIT to so cBPF programs are converted to use the -new JIT. - -Workarounds for R4000 CPU errata are not implemented by the JIT, so the -JIT is disabled if any of those workarounds are configured. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> ---- - ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3099,6 +3099,7 @@ S: Supported - F: arch/arm64/net/ - - BPF JIT for MIPS (32-BIT AND 64-BIT) -+M: Johan Almbladh <johan.almbladh@anyfinetworks.com> - M: Paul Burton <paulburton@kernel.org> - L: netdev@vger.kernel.org - L: bpf@vger.kernel.org ---- a/arch/mips/Kconfig -+++ b/arch/mips/Kconfig -@@ -46,8 +46,10 @@ config MIPS - select HAVE_ARCH_TRACEHOOK - select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES - select HAVE_ASM_MODVERSIONS -- select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS -- select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 -+ select HAVE_EBPF_JIT if !CPU_MICROMIPS && \ -+ !CPU_DADDI_WORKAROUNDS && \ -+ !CPU_R4000_WORKAROUNDS && \ -+ !CPU_R4400_WORKAROUNDS - select HAVE_CONTEXT_TRACKING - select HAVE_COPY_THREAD_TLS - select HAVE_C_RECORDMCOUNT ---- a/arch/mips/net/Makefile -+++ b/arch/mips/net/Makefile -@@ -2,9 +2,10 @@ - # MIPS networking code - - obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o -+obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o - - ifeq ($(CONFIG_32BIT),y) -- obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp.o bpf_jit_comp32.o -+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp32.o - else -- obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o -+ obj-$(CONFIG_MIPS_EBPF_JIT) += bpf_jit_comp64.o - endif diff --git a/target/linux/generic/backport-5.4/071-v5.16-06-mips-bpf-Remove-old-BPF-JIT-implementations.patch b/target/linux/generic/backport-5.4/071-v5.16-06-mips-bpf-Remove-old-BPF-JIT-implementations.patch deleted file mode 100644 index e25c336831..0000000000 --- a/target/linux/generic/backport-5.4/071-v5.16-06-mips-bpf-Remove-old-BPF-JIT-implementations.patch +++ /dev/null @@ -1,387 +0,0 @@ -From: Johan Almbladh <johan.almbladh@anyfinetworks.com> -Date: Tue, 5 Oct 2021 18:54:08 +0200 -Subject: [PATCH] mips: bpf: Remove old BPF JIT implementations - -This patch removes the old 32-bit cBPF and 64-bit eBPF JIT implementations. -They are replaced by a new eBPF implementation that supports both 32-bit -and 64-bit MIPS CPUs. - -Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com> ---- - delete mode 100644 arch/mips/net/bpf_jit.c - delete mode 100644 arch/mips/net/bpf_jit.h - delete mode 100644 arch/mips/net/bpf_jit_asm.S - delete mode 100644 arch/mips/net/ebpf_jit.c - ---- a/arch/mips/net/bpf_jit.h -+++ /dev/null -@@ -1,81 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-only */ --/* -- * Just-In-Time compiler for BPF filters on MIPS -- * -- * Copyright (c) 2014 Imagination Technologies Ltd. -- * Author: Markos Chandras <markos.chandras@imgtec.com> -- */ -- --#ifndef BPF_JIT_MIPS_OP_H --#define BPF_JIT_MIPS_OP_H -- --/* Registers used by JIT */ --#define MIPS_R_ZERO 0 --#define MIPS_R_V0 2 --#define MIPS_R_A0 4 --#define MIPS_R_A1 5 --#define MIPS_R_T4 12 --#define MIPS_R_T5 13 --#define MIPS_R_T6 14 --#define MIPS_R_T7 15 --#define MIPS_R_S0 16 --#define MIPS_R_S1 17 --#define MIPS_R_S2 18 --#define MIPS_R_S3 19 --#define MIPS_R_S4 20 --#define MIPS_R_S5 21 --#define MIPS_R_S6 22 --#define MIPS_R_S7 23 --#define MIPS_R_SP 29 --#define MIPS_R_RA 31 -- --/* Conditional codes */ --#define MIPS_COND_EQ 0x1 --#define MIPS_COND_GE (0x1 << 1) --#define MIPS_COND_GT (0x1 << 2) --#define MIPS_COND_NE (0x1 << 3) --#define MIPS_COND_ALL (0x1 << 4) --/* Conditionals on X register or K immediate */ --#define MIPS_COND_X (0x1 << 5) --#define MIPS_COND_K (0x1 << 6) -- --#define r_ret MIPS_R_V0 -- --/* -- * Use 2 scratch registers to avoid pipeline interlocks. -- * There is no overhead during epilogue and prologue since -- * any of the $s0-$s6 registers will only be preserved if -- * they are going to actually be used. -- */ --#define r_skb_hl MIPS_R_S0 /* skb header length */ --#define r_skb_data MIPS_R_S1 /* skb actual data */ --#define r_off MIPS_R_S2 --#define r_A MIPS_R_S3 --#define r_X MIPS_R_S4 --#define r_skb MIPS_R_S5 --#define r_M MIPS_R_S6 --#define r_skb_len MIPS_R_S7 --#define r_s0 MIPS_R_T4 /* scratch reg 1 */ --#define r_s1 MIPS_R_T5 /* scratch reg 2 */ --#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ --#define r_tmp MIPS_R_T7 /* No need to preserve this */ --#define r_zero MIPS_R_ZERO --#define r_sp MIPS_R_SP --#define r_ra MIPS_R_RA -- --#ifndef __ASSEMBLY__ -- --/* Declare ASM helpers */ -- --#define DECLARE_LOAD_FUNC(func) \ -- extern u8 func(unsigned long *skb, int offset); \ -- extern u8 func##_negative(unsigned long *skb, int offset); \ -- extern u8 func##_positive(unsigned long *skb, int offset) -- --DECLARE_LOAD_FUNC(sk_load_word); --DECLARE_LOAD_FUNC(sk_load_half); --DECLARE_LOAD_FUNC(sk_load_byte); -- --#endif -- --#endif /* BPF_JIT_MIPS_OP_H */ ---- a/arch/mips/net/bpf_jit_asm.S -+++ /dev/null -@@ -1,285 +0,0 @@ --/* -- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF -- * compiler. -- * -- * Copyright (C) 2015 Imagination Technologies Ltd. -- * Author: Markos Chandras <markos.chandras@imgtec.com> -- * -- * This program is free software; you can redistribute it and/or modify it -- * under the terms of the GNU General Public License as published by the -- * Free Software Foundation; version 2 of the License. -- */ -- --#include <asm/asm.h> --#include <asm/isa-rev.h> --#include <asm/regdef.h> --#include "bpf_jit.h" -- --/* ABI -- * -- * r_skb_hl skb header length -- * r_skb_data skb data -- * r_off(a1) offset register -- * r_A BPF register A -- * r_X PF register X -- * r_skb(a0) *skb -- * r_M *scratch memory -- * r_skb_le skb length -- * r_s0 Scratch register 0 -- * r_s1 Scratch register 1 -- * -- * On entry: -- * a0: *skb -- * a1: offset (imm or imm + X) -- * -- * All non-BPF-ABI registers are free for use. On return, we only -- * care about r_ret. The BPF-ABI registers are assumed to remain -- * unmodified during the entire filter operation. -- */ -- --#define skb a0 --#define offset a1 --#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ -- -- /* We know better :) so prevent assembler reordering etc */ -- .set noreorder -- --#define is_offset_negative(TYPE) \ -- /* If offset is negative we have more work to do */ \ -- slti t0, offset, 0; \ -- bgtz t0, bpf_slow_path_##TYPE##_neg; \ -- /* Be careful what follows in DS. */ -- --#define is_offset_in_header(SIZE, TYPE) \ -- /* Reading from header? */ \ -- addiu $r_s0, $r_skb_hl, -SIZE; \ -- slt t0, $r_s0, offset; \ -- bgtz t0, bpf_slow_path_##TYPE; \ -- --LEAF(sk_load_word) -- is_offset_negative(word) --FEXPORT(sk_load_word_positive) -- is_offset_in_header(4, word) -- /* Offset within header boundaries */ -- PTR_ADDU t1, $r_skb_data, offset -- .set reorder -- lw $r_A, 0(t1) -- .set noreorder --#ifdef CONFIG_CPU_LITTLE_ENDIAN --# if MIPS_ISA_REV >= 2 -- wsbh t0, $r_A -- rotr $r_A, t0, 16 --# else -- sll t0, $r_A, 24 -- srl t1, $r_A, 24 -- srl t2, $r_A, 8 -- or t0, t0, t1 -- andi t2, t2, 0xff00 -- andi t1, $r_A, 0xff00 -- or t0, t0, t2 -- sll t1, t1, 8 -- or $r_A, t0, t1 --# endif --#endif -- jr $r_ra -- move $r_ret, zero -- END(sk_load_word) -- --LEAF(sk_load_half) -- is_offset_negative(half) --FEXPORT(sk_load_half_positive) -- is_offset_in_header(2, half) -- /* Offset within header boundaries */ -- PTR_ADDU t1, $r_skb_data, offset -- lhu $r_A, 0(t1) --#ifdef CONFIG_CPU_LITTLE_ENDIAN --# if MIPS_ISA_REV >= 2 -- wsbh $r_A, $r_A --# else -- sll t0, $r_A, 8 -- srl t1, $r_A, 8 -- andi t0, t0, 0xff00 -- or $r_A, t0, t1 --# endif --#endif -- jr $r_ra -- move $r_ret, zero -- END(sk_load_half) -- --LEAF(sk_load_byte) -- is_offset_negative(byte) --FEXPORT(sk_load_byte_positive) -- is_offset_in_header(1, byte) -- /* Offset within header boundaries */ -- PTR_ADDU t1, $r_skb_data, offset -- lbu $r_A, 0(t1) -- jr $r_ra -- move $r_ret, zero -- END(sk_load_byte) -- --/* -- * call skb_copy_bits: -- * (prototype in linux/skbuff.h) -- * -- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) -- * -- * o32 mandates we leave 4 spaces for argument registers in case -- * the callee needs to use them. Even though we don't care about -- * the argument registers ourselves, we need to allocate that space -- * to remain ABI compliant since the callee may want to use that space. -- * We also allocate 2 more spaces for $r_ra and our return register (*to). -- * -- * n64 is a bit different. The *caller* will allocate the space to preserve -- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no -- * good reason but it does not matter that much really. -- * -- * (void *to) is returned in r_s0 -- * -- */ --#ifdef CONFIG_CPU_LITTLE_ENDIAN --#define DS_OFFSET(SIZE) (4 * SZREG) --#else --#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) --#endif --#define bpf_slow_path_common(SIZE) \ -- /* Quick check. Are we within reasonable boundaries? */ \ -- LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ -- sltu $r_s0, offset, $r_s1; \ -- beqz $r_s0, fault; \ -- /* Load 4th argument in DS */ \ -- LONG_ADDIU a3, zero, SIZE; \ -- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ -- PTR_LA t0, skb_copy_bits; \ -- PTR_S $r_ra, (5 * SZREG)($r_sp); \ -- /* Assign low slot to a2 */ \ -- PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ -- jalr t0; \ -- /* Reset our destination slot (DS but it's ok) */ \ -- INT_S zero, (4 * SZREG)($r_sp); \ -- /* \ -- * skb_copy_bits returns 0 on success and -EFAULT \ -- * on error. Our data live in a2. Do not bother with \ -- * our data if an error has been returned. \ -- */ \ -- /* Restore our frame */ \ -- PTR_L $r_ra, (5 * SZREG)($r_sp); \ -- INT_L $r_s0, (4 * SZREG)($r_sp); \ -- bltz v0, fault; \ -- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ -- move $r_ret, zero; \ -- --NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) -- bpf_slow_path_common(4) --#ifdef CONFIG_CPU_LITTLE_ENDIAN --# if MIPS_ISA_REV >= 2 -- wsbh t0, $r_s0 -- jr $r_ra -- rotr $r_A, t0, 16 --# else -- sll t0, $r_s0, 24 -- srl t1, $r_s0, 24 -- srl t2, $r_s0, 8 -- or t0, t0, t1 -- andi t2, t2, 0xff00 -- andi t1, $r_s0, 0xff00 -- or t0, t0, t2 -- sll t1, t1, 8 -- jr $r_ra -- or $r_A, t0, t1 --# endif --#else -- jr $r_ra -- move $r_A, $r_s0 --#endif -- -- END(bpf_slow_path_word) -- --NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) -- bpf_slow_path_common(2) --#ifdef CONFIG_CPU_LITTLE_ENDIAN --# if MIPS_ISA_REV >= 2 -- jr $r_ra -- wsbh $r_A, $r_s0 --# else -- sll t0, $r_s0, 8 -- andi t1, $r_s0, 0xff00 -- andi t0, t0, 0xff00 -- srl t1, t1, 8 -- jr $r_ra -- or $r_A, t0, t1 --# endif --#else -- jr $r_ra -- move $r_A, $r_s0 --#endif -- -- END(bpf_slow_path_half) -- --NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) -- bpf_slow_path_common(1) -- jr $r_ra -- move $r_A, $r_s0 -- -- END(bpf_slow_path_byte) -- --/* -- * Negative entry points -- */ -- .macro bpf_is_end_of_data -- li t0, SKF_LL_OFF -- /* Reading link layer data? */ -- slt t1, offset, t0 -- bgtz t1, fault -- /* Be careful what follows in DS. */ -- .endm --/* -- * call skb_copy_bits: -- * (prototype in linux/filter.h) -- * -- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, -- * int k, unsigned int size) -- * -- * see above (bpf_slow_path_common) for ABI restrictions -- */ --#define bpf_negative_common(SIZE) \ -- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ -- PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ -- PTR_S $r_ra, (5 * SZREG)($r_sp); \ -- jalr t0; \ -- li a2, SIZE; \ -- PTR_L $r_ra, (5 * SZREG)($r_sp); \ -- /* Check return pointer */ \ -- beqz v0, fault; \ -- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ -- /* Preserve our pointer */ \ -- move $r_s0, v0; \ -- /* Set return value */ \ -- move $r_ret, zero; \ -- --bpf_slow_path_word_neg: -- bpf_is_end_of_data --NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) -- bpf_negative_common(4) -- jr $r_ra -- lw $r_A, 0($r_s0) -- END(sk_load_word_negative) -- --bpf_slow_path_half_neg: -- bpf_is_end_of_data --NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) -- bpf_negative_common(2) -- jr $r_ra -- lhu $r_A, 0($r_s0) -- END(sk_load_half_negative) -- --bpf_slow_path_byte_neg: -- bpf_is_end_of_data --NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) -- bpf_negative_common(1) -- jr $r_ra -- lbu $r_A, 0($r_s0) -- END(sk_load_byte_negative) -- --fault: -- jr $r_ra -- addiu $r_ret, zero, 1 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0001-crypto-lib-tidy-up-lib-crypto-Kconfig-and-Makefile.patch b/target/linux/generic/backport-5.4/080-wireguard-0001-crypto-lib-tidy-up-lib-crypto-Kconfig-and-Makefile.patch deleted file mode 100644 index e32e18a357..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0001-crypto-lib-tidy-up-lib-crypto-Kconfig-and-Makefile.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:07 +0100 -Subject: [PATCH] crypto: lib - tidy up lib/crypto Kconfig and Makefile - -commit 746b2e024c67aa605ac12d135cd7085a49cf9dc4 upstream. - -In preparation of introducing a set of crypto library interfaces, tidy -up the Makefile and split off the Kconfig symbols into a separate file. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/Kconfig | 13 +------------ - lib/crypto/Kconfig | 15 +++++++++++++++ - lib/crypto/Makefile | 16 ++++++++-------- - 3 files changed, 24 insertions(+), 20 deletions(-) - create mode 100644 lib/crypto/Kconfig - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -878,9 +878,6 @@ config CRYPTO_SHA1_PPC_SPE - SHA-1 secure hash standard (DFIPS 180-4) implemented - using powerpc SPE SIMD instruction set. - --config CRYPTO_LIB_SHA256 -- tristate -- - config CRYPTO_SHA256 - tristate "SHA224 and SHA256 digest algorithm" - select CRYPTO_HASH -@@ -1019,9 +1016,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL - - comment "Ciphers" - --config CRYPTO_LIB_AES -- tristate -- - config CRYPTO_AES - tristate "AES cipher algorithms" - select CRYPTO_ALGAPI -@@ -1150,9 +1144,6 @@ config CRYPTO_ANUBIS - <https://www.cosic.esat.kuleuven.be/nessie/reports/> - <http://www.larc.usp.br/~pbarreto/AnubisPage.html> - --config CRYPTO_LIB_ARC4 -- tristate -- - config CRYPTO_ARC4 - tristate "ARC4 cipher algorithm" - select CRYPTO_BLKCIPHER -@@ -1339,9 +1330,6 @@ config CRYPTO_CAST6_AVX_X86_64 - This module provides the Cast6 cipher algorithm that processes - eight blocks parallel using the AVX instruction set. - --config CRYPTO_LIB_DES -- tristate -- - config CRYPTO_DES - tristate "DES and Triple DES EDE cipher algorithms" - select CRYPTO_ALGAPI -@@ -1845,6 +1833,7 @@ config CRYPTO_STATS - config CRYPTO_HASH_INFO - bool - -+source "lib/crypto/Kconfig" - source "drivers/crypto/Kconfig" - source "crypto/asymmetric_keys/Kconfig" - source "certs/Kconfig" ---- /dev/null -+++ b/lib/crypto/Kconfig -@@ -0,0 +1,15 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+comment "Crypto library routines" -+ -+config CRYPTO_LIB_AES -+ tristate -+ -+config CRYPTO_LIB_ARC4 -+ tristate -+ -+config CRYPTO_LIB_DES -+ tristate -+ -+config CRYPTO_LIB_SHA256 -+ tristate ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -1,13 +1,13 @@ - # SPDX-License-Identifier: GPL-2.0 - --obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o --libaes-y := aes.o -+obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o -+libaes-y := aes.o - --obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o --libarc4-y := arc4.o -+obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o -+libarc4-y := arc4.o - --obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o --libdes-y := des.o -+obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o -+libdes-y := des.o - --obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o --libsha256-y := sha256.o -+obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o -+libsha256-y := sha256.o diff --git a/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch b/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch deleted file mode 100644 index 177b5840d5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch +++ /dev/null @@ -1,668 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:08 +0100 -Subject: [PATCH] crypto: chacha - move existing library code into lib/crypto - -commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream. - -Currently, our generic ChaCha implementation consists of a permute -function in lib/chacha.c that operates on the 64-byte ChaCha state -directly [and which is always included into the core kernel since it -is used by the /dev/random driver], and the crypto API plumbing to -expose it as a skcipher. - -In order to support in-kernel users that need the ChaCha streamcipher -but have no need [or tolerance] for going through the abstractions of -the crypto API, let's expose the streamcipher bits via a library API -as well, in a way that permits the implementation to be superseded by -an architecture specific one if provided. - -So move the streamcipher code into a separate module in lib/crypto, -and expose the init() and crypt() routines to users of the library. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-neon-glue.c | 2 +- - arch/arm64/crypto/chacha-neon-glue.c | 2 +- - arch/x86/crypto/chacha_glue.c | 2 +- - crypto/Kconfig | 1 + - crypto/chacha_generic.c | 60 ++-------------------- - include/crypto/chacha.h | 77 ++++++++++++++++++++++------ - include/crypto/internal/chacha.h | 53 +++++++++++++++++++ - lib/Makefile | 3 +- - lib/crypto/Kconfig | 26 ++++++++++ - lib/crypto/Makefile | 4 ++ - lib/{ => crypto}/chacha.c | 20 ++++---- - lib/crypto/libchacha.c | 35 +++++++++++++ - 12 files changed, 199 insertions(+), 86 deletions(-) - create mode 100644 include/crypto/internal/chacha.h - rename lib/{ => crypto}/chacha.c (88%) - create mode 100644 lib/crypto/libchacha.c - ---- a/arch/arm/crypto/chacha-neon-glue.c -+++ b/arch/arm/crypto/chacha-neon-glue.c -@@ -20,7 +20,7 @@ - */ - - #include <crypto/algapi.h> --#include <crypto/chacha.h> -+#include <crypto/internal/chacha.h> - #include <crypto/internal/simd.h> - #include <crypto/internal/skcipher.h> - #include <linux/kernel.h> ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -20,7 +20,7 @@ - */ - - #include <crypto/algapi.h> --#include <crypto/chacha.h> -+#include <crypto/internal/chacha.h> - #include <crypto/internal/simd.h> - #include <crypto/internal/skcipher.h> - #include <linux/kernel.h> ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -7,7 +7,7 @@ - */ - - #include <crypto/algapi.h> --#include <crypto/chacha.h> -+#include <crypto/internal/chacha.h> - #include <crypto/internal/simd.h> - #include <crypto/internal/skcipher.h> - #include <linux/kernel.h> ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20 - - config CRYPTO_CHACHA20 - tristate "ChaCha stream cipher algorithms" -+ select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_BLKCIPHER - help - The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms. ---- a/crypto/chacha_generic.c -+++ b/crypto/chacha_generic.c -@@ -8,29 +8,10 @@ - - #include <asm/unaligned.h> - #include <crypto/algapi.h> --#include <crypto/chacha.h> -+#include <crypto/internal/chacha.h> - #include <crypto/internal/skcipher.h> - #include <linux/module.h> - --static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src, -- unsigned int bytes, int nrounds) --{ -- /* aligned to potentially speed up crypto_xor() */ -- u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); -- -- while (bytes >= CHACHA_BLOCK_SIZE) { -- chacha_block(state, stream, nrounds); -- crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); -- bytes -= CHACHA_BLOCK_SIZE; -- dst += CHACHA_BLOCK_SIZE; -- src += CHACHA_BLOCK_SIZE; -- } -- if (bytes) { -- chacha_block(state, stream, nrounds); -- crypto_xor_cpy(dst, src, stream, bytes); -- } --} -- - static int chacha_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) - { -@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skci - if (nbytes < walk.total) - nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); - -- chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, -- nbytes, ctx->nrounds); -+ chacha_crypt_generic(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, ctx->nrounds); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - -@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skci - - void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv) - { -- state[0] = 0x61707865; /* "expa" */ -- state[1] = 0x3320646e; /* "nd 3" */ -- state[2] = 0x79622d32; /* "2-by" */ -- state[3] = 0x6b206574; /* "te k" */ -- state[4] = ctx->key[0]; -- state[5] = ctx->key[1]; -- state[6] = ctx->key[2]; -- state[7] = ctx->key[3]; -- state[8] = ctx->key[4]; -- state[9] = ctx->key[5]; -- state[10] = ctx->key[6]; -- state[11] = ctx->key[7]; -- state[12] = get_unaligned_le32(iv + 0); -- state[13] = get_unaligned_le32(iv + 4); -- state[14] = get_unaligned_le32(iv + 8); -- state[15] = get_unaligned_le32(iv + 12); -+ chacha_init_generic(state, ctx->key, iv); - } - EXPORT_SYMBOL_GPL(crypto_chacha_init); - --static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize, int nrounds) --{ -- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- int i; -- -- if (keysize != CHACHA_KEY_SIZE) -- return -EINVAL; -- -- for (i = 0; i < ARRAY_SIZE(ctx->key); i++) -- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); -- -- ctx->nrounds = nrounds; -- return 0; --} -- - int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) - { -@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher - - /* Compute the subkey given the original key and first 128 nonce bits */ - crypto_chacha_init(state, ctx, req->iv); -- hchacha_block(state, subctx.key, ctx->nrounds); -+ hchacha_block_generic(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - - /* Build the real IV */ ---- a/include/crypto/chacha.h -+++ b/include/crypto/chacha.h -@@ -15,9 +15,8 @@ - #ifndef _CRYPTO_CHACHA_H - #define _CRYPTO_CHACHA_H - --#include <crypto/skcipher.h> -+#include <asm/unaligned.h> - #include <linux/types.h> --#include <linux/crypto.h> - - /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */ - #define CHACHA_IV_SIZE 16 -@@ -29,26 +28,70 @@ - /* 192-bit nonce, then 64-bit stream position */ - #define XCHACHA_IV_SIZE 32 - --struct chacha_ctx { -- u32 key[8]; -- int nrounds; --}; -- --void chacha_block(u32 *state, u8 *stream, int nrounds); -+void chacha_block_generic(u32 *state, u8 *stream, int nrounds); - static inline void chacha20_block(u32 *state, u8 *stream) - { -- chacha_block(state, stream, 20); -+ chacha_block_generic(state, stream, 20); - } --void hchacha_block(const u32 *in, u32 *out, int nrounds); - --void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); -+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds); -+void hchacha_block_generic(const u32 *state, u32 *out, int nrounds); -+ -+static inline void hchacha_block(const u32 *state, u32 *out, int nrounds) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) -+ hchacha_block_arch(state, out, nrounds); -+ else -+ hchacha_block_generic(state, out, nrounds); -+} - --int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize); --int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize); -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv); -+static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv) -+{ -+ state[0] = 0x61707865; /* "expa" */ -+ state[1] = 0x3320646e; /* "nd 3" */ -+ state[2] = 0x79622d32; /* "2-by" */ -+ state[3] = 0x6b206574; /* "te k" */ -+ state[4] = key[0]; -+ state[5] = key[1]; -+ state[6] = key[2]; -+ state[7] = key[3]; -+ state[8] = key[4]; -+ state[9] = key[5]; -+ state[10] = key[6]; -+ state[11] = key[7]; -+ state[12] = get_unaligned_le32(iv + 0); -+ state[13] = get_unaligned_le32(iv + 4); -+ state[14] = get_unaligned_le32(iv + 8); -+ state[15] = get_unaligned_le32(iv + 12); -+} - --int crypto_chacha_crypt(struct skcipher_request *req); --int crypto_xchacha_crypt(struct skcipher_request *req); -+static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) -+ chacha_init_arch(state, key, iv); -+ else -+ chacha_init_generic(state, key, iv); -+} -+ -+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds); -+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds); -+ -+static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA)) -+ chacha_crypt_arch(state, dst, src, bytes, nrounds); -+ else -+ chacha_crypt_generic(state, dst, src, bytes, nrounds); -+} -+ -+static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes) -+{ -+ chacha_crypt(state, dst, src, bytes, 20); -+} - - #endif /* _CRYPTO_CHACHA_H */ ---- /dev/null -+++ b/include/crypto/internal/chacha.h -@@ -0,0 +1,53 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+ -+#ifndef _CRYPTO_INTERNAL_CHACHA_H -+#define _CRYPTO_INTERNAL_CHACHA_H -+ -+#include <crypto/chacha.h> -+#include <crypto/internal/skcipher.h> -+#include <linux/crypto.h> -+ -+struct chacha_ctx { -+ u32 key[8]; -+ int nrounds; -+}; -+ -+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); -+ -+static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize, int nrounds) -+{ -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ int i; -+ -+ if (keysize != CHACHA_KEY_SIZE) -+ return -EINVAL; -+ -+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++) -+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32)); -+ -+ ctx->nrounds = nrounds; -+ return 0; -+} -+ -+static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize) -+{ -+ return chacha_setkey(tfm, key, keysize, 20); -+} -+ -+static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize) -+{ -+ return chacha_setkey(tfm, key, keysize, 12); -+} -+ -+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize); -+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize); -+ -+int crypto_chacha_crypt(struct skcipher_request *req); -+int crypto_xchacha_crypt(struct skcipher_request *req); -+ -+#endif /* _CRYPTO_CHACHA_H */ ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -26,8 +26,7 @@ endif - - lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o timerqueue.o xarray.o \ -- idr.o extable.o \ -- sha1.o chacha.o irq_regs.o argv_split.o \ -+ idr.o extable.o sha1.o irq_regs.o argv_split.o \ - flex_proportions.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES - config CRYPTO_LIB_ARC4 - tristate - -+config CRYPTO_ARCH_HAVE_LIB_CHACHA -+ tristate -+ help -+ Declares whether the architecture provides an arch-specific -+ accelerated implementation of the ChaCha library interface, -+ either builtin or as a module. -+ -+config CRYPTO_LIB_CHACHA_GENERIC -+ tristate -+ select CRYPTO_ALGAPI -+ help -+ This symbol can be depended upon by arch implementations of the -+ ChaCha library interface that require the generic code as a -+ fallback, e.g., for SIMD implementations. If no arch specific -+ implementation is enabled, this implementation serves the users -+ of CRYPTO_LIB_CHACHA. -+ -+config CRYPTO_LIB_CHACHA -+ tristate "ChaCha library interface" -+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA -+ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n -+ help -+ Enable the ChaCha library interface. This interface may be fulfilled -+ by either the generic implementation or an arch-specific one, if one -+ is available and enabled. -+ - config CRYPTO_LIB_DES - tristate - ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -1,5 +1,9 @@ - # SPDX-License-Identifier: GPL-2.0 - -+# chacha is used by the /dev/random driver which is always builtin -+obj-y += chacha.o -+obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o -+ - obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o - libaes-y := aes.o - ---- a/lib/chacha.c -+++ /dev/null -@@ -1,113 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-or-later --/* -- * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) -- * -- * Copyright (C) 2015 Martin Willi -- */ -- --#include <linux/kernel.h> --#include <linux/export.h> --#include <linux/bitops.h> --#include <linux/cryptohash.h> --#include <asm/unaligned.h> --#include <crypto/chacha.h> -- --static void chacha_permute(u32 *x, int nrounds) --{ -- int i; -- -- /* whitelist the allowed round counts */ -- WARN_ON_ONCE(nrounds != 20 && nrounds != 12); -- -- for (i = 0; i < nrounds; i += 2) { -- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); -- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); -- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); -- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); -- -- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); -- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); -- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); -- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); -- -- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); -- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); -- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); -- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); -- -- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); -- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); -- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); -- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); -- -- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); -- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); -- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); -- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); -- -- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); -- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); -- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); -- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); -- -- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); -- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); -- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); -- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); -- -- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); -- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); -- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); -- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); -- } --} -- --/** -- * chacha_block - generate one keystream block and increment block counter -- * @state: input state matrix (16 32-bit words) -- * @stream: output keystream block (64 bytes) -- * @nrounds: number of rounds (20 or 12; 20 is recommended) -- * -- * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. -- * The caller has already converted the endianness of the input. This function -- * also handles incrementing the block counter in the input matrix. -- */ --void chacha_block(u32 *state, u8 *stream, int nrounds) --{ -- u32 x[16]; -- int i; -- -- memcpy(x, state, 64); -- -- chacha_permute(x, nrounds); -- -- for (i = 0; i < ARRAY_SIZE(x); i++) -- put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); -- -- state[12]++; --} --EXPORT_SYMBOL(chacha_block); -- --/** -- * hchacha_block - abbreviated ChaCha core, for XChaCha -- * @in: input state matrix (16 32-bit words) -- * @out: output (8 32-bit words) -- * @nrounds: number of rounds (20 or 12; 20 is recommended) -- * -- * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step -- * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha -- * skips the final addition of the initial state, and outputs only certain words -- * of the state. It should not be used for streaming directly. -- */ --void hchacha_block(const u32 *in, u32 *out, int nrounds) --{ -- u32 x[16]; -- -- memcpy(x, in, 64); -- -- chacha_permute(x, nrounds); -- -- memcpy(&out[0], &x[0], 16); -- memcpy(&out[4], &x[12], 16); --} --EXPORT_SYMBOL(hchacha_block); ---- /dev/null -+++ b/lib/crypto/chacha.c -@@ -0,0 +1,115 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) -+ * -+ * Copyright (C) 2015 Martin Willi -+ */ -+ -+#include <linux/bug.h> -+#include <linux/kernel.h> -+#include <linux/export.h> -+#include <linux/bitops.h> -+#include <linux/string.h> -+#include <linux/cryptohash.h> -+#include <asm/unaligned.h> -+#include <crypto/chacha.h> -+ -+static void chacha_permute(u32 *x, int nrounds) -+{ -+ int i; -+ -+ /* whitelist the allowed round counts */ -+ WARN_ON_ONCE(nrounds != 20 && nrounds != 12); -+ -+ for (i = 0; i < nrounds; i += 2) { -+ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); -+ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); -+ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); -+ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); -+ -+ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); -+ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); -+ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); -+ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); -+ -+ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); -+ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); -+ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); -+ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); -+ -+ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); -+ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); -+ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); -+ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); -+ -+ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); -+ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); -+ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); -+ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); -+ -+ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); -+ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); -+ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); -+ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); -+ -+ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); -+ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); -+ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); -+ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); -+ -+ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); -+ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); -+ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); -+ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); -+ } -+} -+ -+/** -+ * chacha_block - generate one keystream block and increment block counter -+ * @state: input state matrix (16 32-bit words) -+ * @stream: output keystream block (64 bytes) -+ * @nrounds: number of rounds (20 or 12; 20 is recommended) -+ * -+ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. -+ * The caller has already converted the endianness of the input. This function -+ * also handles incrementing the block counter in the input matrix. -+ */ -+void chacha_block_generic(u32 *state, u8 *stream, int nrounds) -+{ -+ u32 x[16]; -+ int i; -+ -+ memcpy(x, state, 64); -+ -+ chacha_permute(x, nrounds); -+ -+ for (i = 0; i < ARRAY_SIZE(x); i++) -+ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); -+ -+ state[12]++; -+} -+EXPORT_SYMBOL(chacha_block_generic); -+ -+/** -+ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha -+ * @state: input state matrix (16 32-bit words) -+ * @out: output (8 32-bit words) -+ * @nrounds: number of rounds (20 or 12; 20 is recommended) -+ * -+ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step -+ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha -+ * skips the final addition of the initial state, and outputs only certain words -+ * of the state. It should not be used for streaming directly. -+ */ -+void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds) -+{ -+ u32 x[16]; -+ -+ memcpy(x, state, 64); -+ -+ chacha_permute(x, nrounds); -+ -+ memcpy(&stream[0], &x[0], 16); -+ memcpy(&stream[4], &x[12], 16); -+} -+EXPORT_SYMBOL(hchacha_block_generic); ---- /dev/null -+++ b/lib/crypto/libchacha.c -@@ -0,0 +1,35 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * The ChaCha stream cipher (RFC7539) -+ * -+ * Copyright (C) 2015 Martin Willi -+ */ -+ -+#include <linux/kernel.h> -+#include <linux/export.h> -+#include <linux/module.h> -+ -+#include <crypto/algapi.h> // for crypto_xor_cpy -+#include <crypto/chacha.h> -+ -+void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds) -+{ -+ /* aligned to potentially speed up crypto_xor() */ -+ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); -+ -+ while (bytes >= CHACHA_BLOCK_SIZE) { -+ chacha_block_generic(state, stream, nrounds); -+ crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); -+ bytes -= CHACHA_BLOCK_SIZE; -+ dst += CHACHA_BLOCK_SIZE; -+ src += CHACHA_BLOCK_SIZE; -+ } -+ if (bytes) { -+ chacha_block_generic(state, stream, nrounds); -+ crypto_xor_cpy(dst, src, stream, bytes); -+ } -+} -+EXPORT_SYMBOL(chacha_crypt_generic); -+ -+MODULE_LICENSE("GPL"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0003-crypto-x86-chacha-depend-on-generic-chacha-library-i.patch b/target/linux/generic/backport-5.4/080-wireguard-0003-crypto-x86-chacha-depend-on-generic-chacha-library-i.patch deleted file mode 100644 index b1f59cc38f..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0003-crypto-x86-chacha-depend-on-generic-chacha-library-i.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:09 +0100 -Subject: [PATCH] crypto: x86/chacha - depend on generic chacha library instead - of crypto driver - -commit 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 upstream. - -In preparation of extending the x86 ChaCha driver to also expose the ChaCha -library interface, drop the dependency on the chacha_generic crypto driver -as a non-SIMD fallback, and depend on the generic ChaCha library directly. -This way, we only pull in the code we actually need, without registering -a set of ChaCha skciphers that we will never use. - -Since turning the FPU on and off is cheap these days, simplify the SIMD -routine by dropping the per-page yield, which makes for a cleaner switch -to the library API as well. This also allows use to invoke the skcipher -walk routines in non-atomic mode. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/chacha_glue.c | 90 ++++++++++++++--------------------- - crypto/Kconfig | 2 +- - 2 files changed, 36 insertions(+), 56 deletions(-) - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8 - } - } - --static int chacha_simd_stream_xor(struct skcipher_walk *walk, -+static int chacha_simd_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) - { - u32 *state, state_buf[16 + 2] __aligned(8); -- int next_yield = 4096; /* bytes until next FPU yield */ -- int err = 0; -+ struct skcipher_walk walk; -+ int err; -+ -+ err = skcipher_walk_virt(&walk, req, false); - - BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); - state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - -- crypto_chacha_init(state, ctx, iv); -+ chacha_init_generic(state, ctx->key, iv); - -- while (walk->nbytes > 0) { -- unsigned int nbytes = walk->nbytes; -+ while (walk.nbytes > 0) { -+ unsigned int nbytes = walk.nbytes; - -- if (nbytes < walk->total) { -- nbytes = round_down(nbytes, walk->stride); -- next_yield -= nbytes; -- } -- -- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, -- nbytes, ctx->nrounds); -+ if (nbytes < walk.total) -+ nbytes = round_down(nbytes, walk.stride); - -- if (next_yield <= 0) { -- /* temporarily allow preemption */ -- kernel_fpu_end(); -+ if (!crypto_simd_usable()) { -+ chacha_crypt_generic(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, -+ ctx->nrounds); -+ } else { - kernel_fpu_begin(); -- next_yield = 4096; -+ chacha_dosimd(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, -+ ctx->nrounds); -+ kernel_fpu_end(); - } -- -- err = skcipher_walk_done(walk, walk->nbytes - nbytes); -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -@@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_r - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- struct skcipher_walk walk; -- int err; -- -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_chacha_crypt(req); - -- err = skcipher_walk_virt(&walk, req, true); -- if (err) -- return err; -- -- kernel_fpu_begin(); -- err = chacha_simd_stream_xor(&walk, ctx, req->iv); -- kernel_fpu_end(); -- return err; -+ return chacha_simd_stream_xor(req, ctx, req->iv); - } - - static int xchacha_simd(struct skcipher_request *req) - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- struct skcipher_walk walk; -- struct chacha_ctx subctx; - u32 *state, state_buf[16 + 2] __aligned(8); -+ struct chacha_ctx subctx; - u8 real_iv[16]; -- int err; -- -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_xchacha_crypt(req); -- -- err = skcipher_walk_virt(&walk, req, true); -- if (err) -- return err; - - BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); - state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); -- crypto_chacha_init(state, ctx, req->iv); -+ chacha_init_generic(state, ctx->key, req->iv); - -- kernel_fpu_begin(); -- -- hchacha_block_ssse3(state, subctx.key, ctx->nrounds); -+ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { -+ kernel_fpu_begin(); -+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds); -+ kernel_fpu_end(); -+ } else { -+ hchacha_block_generic(state, subctx.key, ctx->nrounds); -+ } - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); -- err = chacha_simd_stream_xor(&walk, &subctx, real_iv); -- -- kernel_fpu_end(); -- -- return err; -+ return chacha_simd_stream_xor(req, &subctx, real_iv); - } - - static struct skcipher_alg algs[] = { -@@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = chacha_simd, - .decrypt = chacha_simd, - }, { -@@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = xchacha_simd, - .decrypt = xchacha_simd, - }, { -@@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha12_setkey, -+ .setkey = chacha12_setkey, - .encrypt = xchacha_simd, - .decrypt = xchacha_simd, - }, ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -1417,7 +1417,7 @@ config CRYPTO_CHACHA20_X86_64 - tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)" - depends on X86 && 64BIT - select CRYPTO_BLKCIPHER -- select CRYPTO_CHACHA20 -+ select CRYPTO_LIB_CHACHA_GENERIC - help - SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, - XChaCha20, and XChaCha12 stream ciphers. diff --git a/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch b/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch deleted file mode 100644 index 0e5462837b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:10 +0100 -Subject: [PATCH] crypto: x86/chacha - expose SIMD ChaCha routine as library - function - -commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream. - -Wire the existing x86 SIMD ChaCha code into the new ChaCha library -interface, so that users of the library interface will get the -accelerated version when available. - -Given that calls into the library API will always go through the -routines in this module if it is enabled, switch to static keys -to select the optimal implementation available (which may be none -at all, in which case we defer to the generic implementation for -all invocations). - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++---------- - crypto/Kconfig | 1 + - include/crypto/chacha.h | 6 +++ - 3 files changed, 73 insertions(+), 25 deletions(-) - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u - asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); --#ifdef CONFIG_AS_AVX2 -+ - asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); --static bool chacha_use_avx2; --#ifdef CONFIG_AS_AVX512 -+ - asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); --static bool chacha_use_avx512vl; --#endif --#endif -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); - - static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) - { -@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsig - static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) - { --#ifdef CONFIG_AS_AVX2 --#ifdef CONFIG_AS_AVX512 -- if (chacha_use_avx512vl) { -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ static_branch_likely(&chacha_use_avx512vl)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx512vl(state, dst, src, bytes, - nrounds); -@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 - return; - } - } --#endif -- if (chacha_use_avx2) { -+ -+ if (IS_ENABLED(CONFIG_AS_AVX2) && -+ static_branch_likely(&chacha_use_avx2)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 8; -@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 - return; - } - } --#endif -+ - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; -@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 - } - } - -+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -+{ -+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -+ -+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { -+ hchacha_block_generic(state, stream, nrounds); -+ } else { -+ kernel_fpu_begin(); -+ hchacha_block_ssse3(state, stream, nrounds); -+ kernel_fpu_end(); -+ } -+} -+EXPORT_SYMBOL(hchacha_block_arch); -+ -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -+{ -+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -+ -+ chacha_init_generic(state, key, iv); -+} -+EXPORT_SYMBOL(chacha_init_arch); -+ -+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, -+ int nrounds) -+{ -+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -+ -+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || -+ bytes <= CHACHA_BLOCK_SIZE) -+ return chacha_crypt_generic(state, dst, src, bytes, nrounds); -+ -+ kernel_fpu_begin(); -+ chacha_dosimd(state, dst, src, bytes, nrounds); -+ kernel_fpu_end(); -+} -+EXPORT_SYMBOL(chacha_crypt_arch); -+ - static int chacha_simd_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) - { -@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - -- if (!crypto_simd_usable()) { -+ if (!static_branch_likely(&chacha_use_simd) || -+ !crypto_simd_usable()) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); -@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = { - static int __init chacha_simd_mod_init(void) - { - if (!boot_cpu_has(X86_FEATURE_SSSE3)) -- return -ENODEV; -+ return 0; - --#ifdef CONFIG_AS_AVX2 -- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && -- boot_cpu_has(X86_FEATURE_AVX2) && -- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); --#ifdef CONFIG_AS_AVX512 -- chacha_use_avx512vl = chacha_use_avx2 && -- boot_cpu_has(X86_FEATURE_AVX512VL) && -- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ --#endif --#endif -+ static_branch_enable(&chacha_use_simd); -+ -+ if (IS_ENABLED(CONFIG_AS_AVX2) && -+ boot_cpu_has(X86_FEATURE_AVX) && -+ boot_cpu_has(X86_FEATURE_AVX2) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { -+ static_branch_enable(&chacha_use_avx2); -+ -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ boot_cpu_has(X86_FEATURE_AVX512VL) && -+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ -+ static_branch_enable(&chacha_use_avx512vl); -+ } - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); - } - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64 - depends on X86 && 64BIT - select CRYPTO_BLKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_CHACHA - help - SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, - XChaCha20, and XChaCha12 stream ciphers. ---- a/include/crypto/chacha.h -+++ b/include/crypto/chacha.h -@@ -25,6 +25,12 @@ - #define CHACHA_BLOCK_SIZE 64 - #define CHACHAPOLY_IV_SIZE 12 - -+#ifdef CONFIG_X86_64 -+#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32)) -+#else -+#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) -+#endif -+ - /* 192-bit nonce, then 64-bit stream position */ - #define XCHACHA_IV_SIZE 32 - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0005-crypto-arm64-chacha-depend-on-generic-chacha-library.patch b/target/linux/generic/backport-5.4/080-wireguard-0005-crypto-arm64-chacha-depend-on-generic-chacha-library.patch deleted file mode 100644 index 10e49c192c..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0005-crypto-arm64-chacha-depend-on-generic-chacha-library.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:11 +0100 -Subject: [PATCH] crypto: arm64/chacha - depend on generic chacha library - instead of crypto driver - -commit c77da4867cbb7841177275dbb250f5c09679fae4 upstream. - -Depend on the generic ChaCha library routines instead of pulling in the -generic ChaCha skcipher driver, which is more than we need, and makes -managing the dependencies between the generic library, generic driver, -accelerated library and driver more complicated. - -While at it, drop the logic to prefer the scalar code on short inputs. -Turning the NEON on and off is cheap these days, and one major use case -for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar -path upon every invocation (when doing the Poly1305 nonce generation) - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm64/crypto/Kconfig | 2 +- - arch/arm64/crypto/chacha-neon-glue.c | 40 +++++++++++++++------------- - 2 files changed, 23 insertions(+), 19 deletions(-) - ---- a/arch/arm64/crypto/Kconfig -+++ b/arch/arm64/crypto/Kconfig -@@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON - tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER -- select CRYPTO_CHACHA20 -+ select CRYPTO_LIB_CHACHA_GENERIC - - config CRYPTO_NHPOLY1305_NEON - tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct - - err = skcipher_walk_virt(&walk, req, false); - -- crypto_chacha_init(state, ctx, iv); -+ chacha_init_generic(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; -@@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct - if (nbytes < walk.total) - nbytes = rounddown(nbytes, walk.stride); - -- kernel_neon_begin(); -- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, -- nbytes, ctx->nrounds); -- kernel_neon_end(); -+ if (!crypto_simd_usable()) { -+ chacha_crypt_generic(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, -+ ctx->nrounds); -+ } else { -+ kernel_neon_begin(); -+ chacha_doneon(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, ctx->nrounds); -+ kernel_neon_end(); -+ } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - -@@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_r - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_chacha_crypt(req); -- - return chacha_neon_stream_xor(req, ctx, req->iv); - } - -@@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_ - u32 state[16]; - u8 real_iv[16]; - -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_xchacha_crypt(req); -- -- crypto_chacha_init(state, ctx, req->iv); -+ chacha_init_generic(state, ctx->key, req->iv); - -- kernel_neon_begin(); -- hchacha_block_neon(state, subctx.key, ctx->nrounds); -- kernel_neon_end(); -+ if (crypto_simd_usable()) { -+ kernel_neon_begin(); -+ hchacha_block_neon(state, subctx.key, ctx->nrounds); -+ kernel_neon_end(); -+ } else { -+ hchacha_block_generic(state, subctx.key, ctx->nrounds); -+ } - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); -@@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = { - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = chacha_neon, - .decrypt = chacha_neon, - }, { -@@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = { - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - }, { -@@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = { - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 5 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha12_setkey, -+ .setkey = chacha12_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0006-crypto-arm64-chacha-expose-arm64-ChaCha-routine-as-l.patch b/target/linux/generic/backport-5.4/080-wireguard-0006-crypto-arm64-chacha-expose-arm64-ChaCha-routine-as-l.patch deleted file mode 100644 index 71665e8bfd..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0006-crypto-arm64-chacha-expose-arm64-ChaCha-routine-as-l.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:12 +0100 -Subject: [PATCH] crypto: arm64/chacha - expose arm64 ChaCha routine as library - function - -commit b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c upstream. - -Expose the accelerated NEON ChaCha routine directly as a symbol -export so that users of the ChaCha library API can use it directly. - -Given that calls into the library API will always go through the -routines in this module if it is enabled, switch to static keys -to select the optimal implementation available (which may be none -at all, in which case we defer to the generic implementation for -all invocations). - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm64/crypto/Kconfig | 1 + - arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++------ - 2 files changed, 43 insertions(+), 11 deletions(-) - ---- a/arch/arm64/crypto/Kconfig -+++ b/arch/arm64/crypto/Kconfig -@@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_LIB_CHACHA_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_CHACHA - - config CRYPTO_NHPOLY1305_NEON - tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -23,6 +23,7 @@ - #include <crypto/internal/chacha.h> - #include <crypto/internal/simd.h> - #include <crypto/internal/skcipher.h> -+#include <linux/jump_label.h> - #include <linux/kernel.h> - #include <linux/module.h> - -@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u - int nrounds, int bytes); - asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); - -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -+ - static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, - int bytes, int nrounds) - { -@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 - } - } - -+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -+{ -+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { -+ hchacha_block_generic(state, stream, nrounds); -+ } else { -+ kernel_neon_begin(); -+ hchacha_block_neon(state, stream, nrounds); -+ kernel_neon_end(); -+ } -+} -+EXPORT_SYMBOL(hchacha_block_arch); -+ -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -+{ -+ chacha_init_generic(state, key, iv); -+} -+EXPORT_SYMBOL(chacha_init_arch); -+ -+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, -+ int nrounds) -+{ -+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || -+ !crypto_simd_usable()) -+ return chacha_crypt_generic(state, dst, src, bytes, nrounds); -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, bytes, nrounds); -+ kernel_neon_end(); -+} -+EXPORT_SYMBOL(chacha_crypt_arch); -+ - static int chacha_neon_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) - { -@@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct - if (nbytes < walk.total) - nbytes = rounddown(nbytes, walk.stride); - -- if (!crypto_simd_usable()) { -+ if (!static_branch_likely(&have_neon) || -+ !crypto_simd_usable()) { - chacha_crypt_generic(state, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - ctx->nrounds); -@@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_ - u8 real_iv[16]; - - chacha_init_generic(state, ctx->key, req->iv); -- -- if (crypto_simd_usable()) { -- kernel_neon_begin(); -- hchacha_block_neon(state, subctx.key, ctx->nrounds); -- kernel_neon_end(); -- } else { -- hchacha_block_generic(state, subctx.key, ctx->nrounds); -- } -+ hchacha_block_arch(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); -@@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = { - static int __init chacha_simd_mod_init(void) - { - if (!cpu_have_named_feature(ASIMD)) -- return -ENODEV; -+ return 0; -+ -+ static_branch_enable(&have_neon); - - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); - } - - static void __exit chacha_simd_mod_fini(void) - { -- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -+ if (cpu_have_named_feature(ASIMD)) -+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); - } - - module_init(chacha_simd_mod_init); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0007-crypto-arm-chacha-import-Eric-Biggers-s-scalar-accel.patch b/target/linux/generic/backport-5.4/080-wireguard-0007-crypto-arm-chacha-import-Eric-Biggers-s-scalar-accel.patch deleted file mode 100644 index 978f2f55be..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0007-crypto-arm-chacha-import-Eric-Biggers-s-scalar-accel.patch +++ /dev/null @@ -1,480 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:13 +0100 -Subject: [PATCH] crypto: arm/chacha - import Eric Biggers's scalar accelerated - ChaCha code - -commit 29621d099f9c642b22a69dc8e7e20c108473a392 upstream. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++ - 1 file changed, 461 insertions(+) - create mode 100644 arch/arm/crypto/chacha-scalar-core.S - ---- /dev/null -+++ b/arch/arm/crypto/chacha-scalar-core.S -@@ -0,0 +1,461 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2018 Google, Inc. -+ */ -+ -+#include <linux/linkage.h> -+#include <asm/assembler.h> -+ -+/* -+ * Design notes: -+ * -+ * 16 registers would be needed to hold the state matrix, but only 14 are -+ * available because 'sp' and 'pc' cannot be used. So we spill the elements -+ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one -+ * 'ldrd' and one 'strd' instruction per round. -+ * -+ * All rotates are performed using the implicit rotate operand accepted by the -+ * 'add' and 'eor' instructions. This is faster than using explicit rotate -+ * instructions. To make this work, we allow the values in the second and last -+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the -+ * wrong rotation amount. The rotation amount is then fixed up just in time -+ * when the values are used. 'brot' is the number of bits the values in row 'b' -+ * need to be rotated right to arrive at the correct values, and 'drot' -+ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such -+ * that they end up as (25, 24) after every round. -+ */ -+ -+ // ChaCha state registers -+ X0 .req r0 -+ X1 .req r1 -+ X2 .req r2 -+ X3 .req r3 -+ X4 .req r4 -+ X5 .req r5 -+ X6 .req r6 -+ X7 .req r7 -+ X8_X10 .req r8 // shared by x8 and x10 -+ X9_X11 .req r9 // shared by x9 and x11 -+ X12 .req r10 -+ X13 .req r11 -+ X14 .req r12 -+ X15 .req r14 -+ -+.Lexpand_32byte_k: -+ // "expand 32-byte k" -+ .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 -+ -+#ifdef __thumb2__ -+# define adrl adr -+#endif -+ -+.macro __rev out, in, t0, t1, t2 -+.if __LINUX_ARM_ARCH__ >= 6 -+ rev \out, \in -+.else -+ lsl \t0, \in, #24 -+ and \t1, \in, #0xff00 -+ and \t2, \in, #0xff0000 -+ orr \out, \t0, \in, lsr #24 -+ orr \out, \out, \t1, lsl #8 -+ orr \out, \out, \t2, lsr #8 -+.endif -+.endm -+ -+.macro _le32_bswap x, t0, t1, t2 -+#ifdef __ARMEB__ -+ __rev \x, \x, \t0, \t1, \t2 -+#endif -+.endm -+ -+.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 -+ _le32_bswap \a, \t0, \t1, \t2 -+ _le32_bswap \b, \t0, \t1, \t2 -+ _le32_bswap \c, \t0, \t1, \t2 -+ _le32_bswap \d, \t0, \t1, \t2 -+.endm -+ -+.macro __ldrd a, b, src, offset -+#if __LINUX_ARM_ARCH__ >= 6 -+ ldrd \a, \b, [\src, #\offset] -+#else -+ ldr \a, [\src, #\offset] -+ ldr \b, [\src, #\offset + 4] -+#endif -+.endm -+ -+.macro __strd a, b, dst, offset -+#if __LINUX_ARM_ARCH__ >= 6 -+ strd \a, \b, [\dst, #\offset] -+#else -+ str \a, [\dst, #\offset] -+ str \b, [\dst, #\offset + 4] -+#endif -+.endm -+ -+.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2 -+ -+ // a += b; d ^= a; d = rol(d, 16); -+ add \a1, \a1, \b1, ror #brot -+ add \a2, \a2, \b2, ror #brot -+ eor \d1, \a1, \d1, ror #drot -+ eor \d2, \a2, \d2, ror #drot -+ // drot == 32 - 16 == 16 -+ -+ // c += d; b ^= c; b = rol(b, 12); -+ add \c1, \c1, \d1, ror #16 -+ add \c2, \c2, \d2, ror #16 -+ eor \b1, \c1, \b1, ror #brot -+ eor \b2, \c2, \b2, ror #brot -+ // brot == 32 - 12 == 20 -+ -+ // a += b; d ^= a; d = rol(d, 8); -+ add \a1, \a1, \b1, ror #20 -+ add \a2, \a2, \b2, ror #20 -+ eor \d1, \a1, \d1, ror #16 -+ eor \d2, \a2, \d2, ror #16 -+ // drot == 32 - 8 == 24 -+ -+ // c += d; b ^= c; b = rol(b, 7); -+ add \c1, \c1, \d1, ror #24 -+ add \c2, \c2, \d2, ror #24 -+ eor \b1, \c1, \b1, ror #20 -+ eor \b2, \c2, \b2, ror #20 -+ // brot == 32 - 7 == 25 -+.endm -+ -+.macro _doubleround -+ -+ // column round -+ -+ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13) -+ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13 -+ -+ // save (x8, x9); restore (x10, x11) -+ __strd X8_X10, X9_X11, sp, 0 -+ __ldrd X8_X10, X9_X11, sp, 8 -+ -+ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15) -+ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15 -+ -+ .set brot, 25 -+ .set drot, 24 -+ -+ // diagonal round -+ -+ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12) -+ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12 -+ -+ // save (x10, x11); restore (x8, x9) -+ __strd X8_X10, X9_X11, sp, 8 -+ __ldrd X8_X10, X9_X11, sp, 0 -+ -+ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14) -+ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14 -+.endm -+ -+.macro _chacha_permute nrounds -+ .set brot, 0 -+ .set drot, 0 -+ .rept \nrounds / 2 -+ _doubleround -+ .endr -+.endm -+ -+.macro _chacha nrounds -+ -+.Lnext_block\@: -+ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN -+ // Registers contain x0-x9,x12-x15. -+ -+ // Do the core ChaCha permutation to update x0-x15. -+ _chacha_permute \nrounds -+ -+ add sp, #8 -+ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN -+ // Registers contain x0-x9,x12-x15. -+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. -+ -+ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15). -+ push {X8_X10, X9_X11, X12, X13, X14, X15} -+ -+ // Load (OUT, IN, LEN). -+ ldr r14, [sp, #96] -+ ldr r12, [sp, #100] -+ ldr r11, [sp, #104] -+ -+ orr r10, r14, r12 -+ -+ // Use slow path if fewer than 64 bytes remain. -+ cmp r11, #64 -+ blt .Lxor_slowpath\@ -+ -+ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on -+ // ARMv6+, since ldmia and stmia (used below) still require alignment. -+ tst r10, #3 -+ bne .Lxor_slowpath\@ -+ -+ // Fast path: XOR 64 bytes of aligned data. -+ -+ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN -+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT. -+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. -+ -+ // x0-x3 -+ __ldrd r8, r9, sp, 32 -+ __ldrd r10, r11, sp, 40 -+ add X0, X0, r8 -+ add X1, X1, r9 -+ add X2, X2, r10 -+ add X3, X3, r11 -+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 -+ ldmia r12!, {r8-r11} -+ eor X0, X0, r8 -+ eor X1, X1, r9 -+ eor X2, X2, r10 -+ eor X3, X3, r11 -+ stmia r14!, {X0-X3} -+ -+ // x4-x7 -+ __ldrd r8, r9, sp, 48 -+ __ldrd r10, r11, sp, 56 -+ add X4, r8, X4, ror #brot -+ add X5, r9, X5, ror #brot -+ ldmia r12!, {X0-X3} -+ add X6, r10, X6, ror #brot -+ add X7, r11, X7, ror #brot -+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 -+ eor X4, X4, X0 -+ eor X5, X5, X1 -+ eor X6, X6, X2 -+ eor X7, X7, X3 -+ stmia r14!, {X4-X7} -+ -+ // x8-x15 -+ pop {r0-r7} // (x8-x9,x12-x15,x10-x11) -+ __ldrd r8, r9, sp, 32 -+ __ldrd r10, r11, sp, 40 -+ add r0, r0, r8 // x8 -+ add r1, r1, r9 // x9 -+ add r6, r6, r10 // x10 -+ add r7, r7, r11 // x11 -+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 -+ ldmia r12!, {r8-r11} -+ eor r0, r0, r8 // x8 -+ eor r1, r1, r9 // x9 -+ eor r6, r6, r10 // x10 -+ eor r7, r7, r11 // x11 -+ stmia r14!, {r0,r1,r6,r7} -+ ldmia r12!, {r0,r1,r6,r7} -+ __ldrd r8, r9, sp, 48 -+ __ldrd r10, r11, sp, 56 -+ add r2, r8, r2, ror #drot // x12 -+ add r3, r9, r3, ror #drot // x13 -+ add r4, r10, r4, ror #drot // x14 -+ add r5, r11, r5, ror #drot // x15 -+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 -+ ldr r9, [sp, #72] // load LEN -+ eor r2, r2, r0 // x12 -+ eor r3, r3, r1 // x13 -+ eor r4, r4, r6 // x14 -+ eor r5, r5, r7 // x15 -+ subs r9, #64 // decrement and check LEN -+ stmia r14!, {r2-r5} -+ -+ beq .Ldone\@ -+ -+.Lprepare_for_next_block\@: -+ -+ // Stack: x0-x15 OUT IN LEN -+ -+ // Increment block counter (x12) -+ add r8, #1 -+ -+ // Store updated (OUT, IN, LEN) -+ str r14, [sp, #64] -+ str r12, [sp, #68] -+ str r9, [sp, #72] -+ -+ mov r14, sp -+ -+ // Store updated block counter (x12) -+ str r8, [sp, #48] -+ -+ sub sp, #16 -+ -+ // Reload state and do next block -+ ldmia r14!, {r0-r11} // load x0-x11 -+ __strd r10, r11, sp, 8 // store x10-x11 before state -+ ldmia r14, {r10-r12,r14} // load x12-x15 -+ b .Lnext_block\@ -+ -+.Lxor_slowpath\@: -+ // Slow path: < 64 bytes remaining, or unaligned input or output buffer. -+ // We handle it by storing the 64 bytes of keystream to the stack, then -+ // XOR-ing the needed portion with the data. -+ -+ // Allocate keystream buffer -+ sub sp, #64 -+ mov r14, sp -+ -+ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN -+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0. -+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. -+ -+ // Save keystream for x0-x3 -+ __ldrd r8, r9, sp, 96 -+ __ldrd r10, r11, sp, 104 -+ add X0, X0, r8 -+ add X1, X1, r9 -+ add X2, X2, r10 -+ add X3, X3, r11 -+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 -+ stmia r14!, {X0-X3} -+ -+ // Save keystream for x4-x7 -+ __ldrd r8, r9, sp, 112 -+ __ldrd r10, r11, sp, 120 -+ add X4, r8, X4, ror #brot -+ add X5, r9, X5, ror #brot -+ add X6, r10, X6, ror #brot -+ add X7, r11, X7, ror #brot -+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 -+ add r8, sp, #64 -+ stmia r14!, {X4-X7} -+ -+ // Save keystream for x8-x15 -+ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11) -+ __ldrd r8, r9, sp, 128 -+ __ldrd r10, r11, sp, 136 -+ add r0, r0, r8 // x8 -+ add r1, r1, r9 // x9 -+ add r6, r6, r10 // x10 -+ add r7, r7, r11 // x11 -+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 -+ stmia r14!, {r0,r1,r6,r7} -+ __ldrd r8, r9, sp, 144 -+ __ldrd r10, r11, sp, 152 -+ add r2, r8, r2, ror #drot // x12 -+ add r3, r9, r3, ror #drot // x13 -+ add r4, r10, r4, ror #drot // x14 -+ add r5, r11, r5, ror #drot // x15 -+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 -+ stmia r14, {r2-r5} -+ -+ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN -+ // Registers: r8 is block counter, r12 is IN. -+ -+ ldr r9, [sp, #168] // LEN -+ ldr r14, [sp, #160] // OUT -+ cmp r9, #64 -+ mov r0, sp -+ movle r1, r9 -+ movgt r1, #64 -+ // r1 is number of bytes to XOR, in range [1, 64] -+ -+.if __LINUX_ARM_ARCH__ < 6 -+ orr r2, r12, r14 -+ tst r2, #3 // IN or OUT misaligned? -+ bne .Lxor_next_byte\@ -+.endif -+ -+ // XOR a word at a time -+.rept 16 -+ subs r1, #4 -+ blt .Lxor_words_done\@ -+ ldr r2, [r12], #4 -+ ldr r3, [r0], #4 -+ eor r2, r2, r3 -+ str r2, [r14], #4 -+.endr -+ b .Lxor_slowpath_done\@ -+.Lxor_words_done\@: -+ ands r1, r1, #3 -+ beq .Lxor_slowpath_done\@ -+ -+ // XOR a byte at a time -+.Lxor_next_byte\@: -+ ldrb r2, [r12], #1 -+ ldrb r3, [r0], #1 -+ eor r2, r2, r3 -+ strb r2, [r14], #1 -+ subs r1, #1 -+ bne .Lxor_next_byte\@ -+ -+.Lxor_slowpath_done\@: -+ subs r9, #64 -+ add sp, #96 -+ bgt .Lprepare_for_next_block\@ -+ -+.Ldone\@: -+.endm // _chacha -+ -+/* -+ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], -+ * const u32 iv[4]); -+ */ -+ENTRY(chacha20_arm) -+ cmp r2, #0 // len == 0? -+ reteq lr -+ -+ push {r0-r2,r4-r11,lr} -+ -+ // Push state x0-x15 onto stack. -+ // Also store an extra copy of x10-x11 just before the state. -+ -+ ldr r4, [sp, #48] // iv -+ mov r0, sp -+ sub sp, #80 -+ -+ // iv: x12-x15 -+ ldm r4, {X12,X13,X14,X15} -+ stmdb r0!, {X12,X13,X14,X15} -+ -+ // key: x4-x11 -+ __ldrd X8_X10, X9_X11, r3, 24 -+ __strd X8_X10, X9_X11, sp, 8 -+ stmdb r0!, {X8_X10, X9_X11} -+ ldm r3, {X4-X9_X11} -+ stmdb r0!, {X4-X9_X11} -+ -+ // constants: x0-x3 -+ adrl X3, .Lexpand_32byte_k -+ ldm X3, {X0-X3} -+ __strd X0, X1, sp, 16 -+ __strd X2, X3, sp, 24 -+ -+ _chacha 20 -+ -+ add sp, #76 -+ pop {r4-r11, pc} -+ENDPROC(chacha20_arm) -+ -+/* -+ * void hchacha20_arm(const u32 state[16], u32 out[8]); -+ */ -+ENTRY(hchacha20_arm) -+ push {r1,r4-r11,lr} -+ -+ mov r14, r0 -+ ldmia r14!, {r0-r11} // load x0-x11 -+ push {r10-r11} // store x10-x11 to stack -+ ldm r14, {r10-r12,r14} // load x12-x15 -+ sub sp, #8 -+ -+ _chacha_permute 20 -+ -+ // Skip over (unused0-unused1, x10-x11) -+ add sp, #16 -+ -+ // Fix up rotations of x12-x15 -+ ror X12, X12, #drot -+ ror X13, X13, #drot -+ pop {r4} // load 'out' -+ ror X14, X14, #drot -+ ror X15, X15, #drot -+ -+ // Store (x0-x3,x12-x15) to 'out' -+ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} -+ -+ pop {r4-r11,pc} -+ENDPROC(hchacha20_arm) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch b/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch deleted file mode 100644 index 88c9738dbc..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch +++ /dev/null @@ -1,691 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:14 +0100 -Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha - driver - -commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream. - -Instead of falling back to the generic ChaCha skcipher driver for -non-SIMD cases, use a fast scalar implementation for ARM authored -by Eric Biggers. This removes the module dependency on chacha-generic -altogether, which also simplifies things when we expose the ChaCha -library interface from this module. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/Kconfig | 4 +- - arch/arm/crypto/Makefile | 3 +- - arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++ - arch/arm/crypto/chacha-neon-glue.c | 202 ------------------ - arch/arm/crypto/chacha-scalar-core.S | 65 +++--- - arch/arm64/crypto/chacha-neon-glue.c | 2 +- - 6 files changed, 340 insertions(+), 240 deletions(-) - create mode 100644 arch/arm/crypto/chacha-glue.c - delete mode 100644 arch/arm/crypto/chacha-neon-glue.c - ---- a/arch/arm/crypto/Kconfig -+++ b/arch/arm/crypto/Kconfig -@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE - select CRYPTO_HASH - - config CRYPTO_CHACHA20_NEON -- tristate "NEON accelerated ChaCha stream cipher algorithms" -- depends on KERNEL_MODE_NEON -+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" - select CRYPTO_BLKCIPHER -- select CRYPTO_CHACHA20 - - config CRYPTO_NHPOLY1305_NEON - tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" ---- a/arch/arm/crypto/Makefile -+++ b/arch/arm/crypto/Makefile -@@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glu - ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o - crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o - crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o --chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o -+chacha-neon-y := chacha-scalar-core.o chacha-glue.o -+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o - nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o - - ifdef REGENERATE_ARM_CRYPTO ---- /dev/null -+++ b/arch/arm/crypto/chacha-glue.c -@@ -0,0 +1,304 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers, -+ * including ChaCha20 (RFC7539) -+ * -+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> -+ * Copyright (C) 2015 Martin Willi -+ */ -+ -+#include <crypto/algapi.h> -+#include <crypto/internal/chacha.h> -+#include <crypto/internal/simd.h> -+#include <crypto/internal/skcipher.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+#include <asm/cputype.h> -+#include <asm/hwcap.h> -+#include <asm/neon.h> -+#include <asm/simd.h> -+ -+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, -+ int nrounds); -+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, -+ int nrounds); -+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); -+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); -+ -+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, -+ const u32 *state, int nrounds); -+ -+static inline bool neon_usable(void) -+{ -+ return crypto_simd_usable(); -+} -+ -+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds) -+{ -+ u8 buf[CHACHA_BLOCK_SIZE]; -+ -+ while (bytes >= CHACHA_BLOCK_SIZE * 4) { -+ chacha_4block_xor_neon(state, dst, src, nrounds); -+ bytes -= CHACHA_BLOCK_SIZE * 4; -+ src += CHACHA_BLOCK_SIZE * 4; -+ dst += CHACHA_BLOCK_SIZE * 4; -+ state[12] += 4; -+ } -+ while (bytes >= CHACHA_BLOCK_SIZE) { -+ chacha_block_xor_neon(state, dst, src, nrounds); -+ bytes -= CHACHA_BLOCK_SIZE; -+ src += CHACHA_BLOCK_SIZE; -+ dst += CHACHA_BLOCK_SIZE; -+ state[12]++; -+ } -+ if (bytes) { -+ memcpy(buf, src, bytes); -+ chacha_block_xor_neon(state, buf, buf, nrounds); -+ memcpy(dst, buf, bytes); -+ } -+} -+ -+static int chacha_stream_xor(struct skcipher_request *req, -+ const struct chacha_ctx *ctx, const u8 *iv, -+ bool neon) -+{ -+ struct skcipher_walk walk; -+ u32 state[16]; -+ int err; -+ -+ err = skcipher_walk_virt(&walk, req, false); -+ -+ chacha_init_generic(state, ctx->key, iv); -+ -+ while (walk.nbytes > 0) { -+ unsigned int nbytes = walk.nbytes; -+ -+ if (nbytes < walk.total) -+ nbytes = round_down(nbytes, walk.stride); -+ -+ if (!neon) { -+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, -+ nbytes, state, ctx->nrounds); -+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); -+ } else { -+ kernel_neon_begin(); -+ chacha_doneon(state, walk.dst.virt.addr, -+ walk.src.virt.addr, nbytes, ctx->nrounds); -+ kernel_neon_end(); -+ } -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ } -+ -+ return err; -+} -+ -+static int do_chacha(struct skcipher_request *req, bool neon) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ -+ return chacha_stream_xor(req, ctx, req->iv, neon); -+} -+ -+static int chacha_arm(struct skcipher_request *req) -+{ -+ return do_chacha(req, false); -+} -+ -+static int chacha_neon(struct skcipher_request *req) -+{ -+ return do_chacha(req, neon_usable()); -+} -+ -+static int do_xchacha(struct skcipher_request *req, bool neon) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct chacha_ctx subctx; -+ u32 state[16]; -+ u8 real_iv[16]; -+ -+ chacha_init_generic(state, ctx->key, req->iv); -+ -+ if (!neon) { -+ hchacha_block_arm(state, subctx.key, ctx->nrounds); -+ } else { -+ kernel_neon_begin(); -+ hchacha_block_neon(state, subctx.key, ctx->nrounds); -+ kernel_neon_end(); -+ } -+ subctx.nrounds = ctx->nrounds; -+ -+ memcpy(&real_iv[0], req->iv + 24, 8); -+ memcpy(&real_iv[8], req->iv + 16, 8); -+ return chacha_stream_xor(req, &subctx, real_iv, neon); -+} -+ -+static int xchacha_arm(struct skcipher_request *req) -+{ -+ return do_xchacha(req, false); -+} -+ -+static int xchacha_neon(struct skcipher_request *req) -+{ -+ return do_xchacha(req, neon_usable()); -+} -+ -+static struct skcipher_alg arm_algs[] = { -+ { -+ .base.cra_name = "chacha20", -+ .base.cra_driver_name = "chacha20-arm", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = CHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = chacha_arm, -+ .decrypt = chacha_arm, -+ }, { -+ .base.cra_name = "xchacha20", -+ .base.cra_driver_name = "xchacha20-arm", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = xchacha_arm, -+ .decrypt = xchacha_arm, -+ }, { -+ .base.cra_name = "xchacha12", -+ .base.cra_driver_name = "xchacha12-arm", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha12_setkey, -+ .encrypt = xchacha_arm, -+ .decrypt = xchacha_arm, -+ }, -+}; -+ -+static struct skcipher_alg neon_algs[] = { -+ { -+ .base.cra_name = "chacha20", -+ .base.cra_driver_name = "chacha20-neon", -+ .base.cra_priority = 300, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = CHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .walksize = 4 * CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = chacha_neon, -+ .decrypt = chacha_neon, -+ }, { -+ .base.cra_name = "xchacha20", -+ .base.cra_driver_name = "xchacha20-neon", -+ .base.cra_priority = 300, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .walksize = 4 * CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = xchacha_neon, -+ .decrypt = xchacha_neon, -+ }, { -+ .base.cra_name = "xchacha12", -+ .base.cra_driver_name = "xchacha12-neon", -+ .base.cra_priority = 300, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .walksize = 4 * CHACHA_BLOCK_SIZE, -+ .setkey = chacha12_setkey, -+ .encrypt = xchacha_neon, -+ .decrypt = xchacha_neon, -+ } -+}; -+ -+static int __init chacha_simd_mod_init(void) -+{ -+ int err; -+ -+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ if (err) -+ return err; -+ -+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { -+ int i; -+ -+ switch (read_cpuid_part()) { -+ case ARM_CPU_PART_CORTEX_A7: -+ case ARM_CPU_PART_CORTEX_A5: -+ /* -+ * The Cortex-A7 and Cortex-A5 do not perform well with -+ * the NEON implementation but do incredibly with the -+ * scalar one and use less power. -+ */ -+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++) -+ neon_algs[i].base.cra_priority = 0; -+ break; -+ } -+ -+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -+ if (err) -+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ } -+ return err; -+} -+ -+static void __exit chacha_simd_mod_fini(void) -+{ -+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) -+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -+} -+ -+module_init(chacha_simd_mod_init); -+module_exit(chacha_simd_mod_fini); -+ -+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); -+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("chacha20"); -+MODULE_ALIAS_CRYPTO("chacha20-arm"); -+MODULE_ALIAS_CRYPTO("xchacha20"); -+MODULE_ALIAS_CRYPTO("xchacha20-arm"); -+MODULE_ALIAS_CRYPTO("xchacha12"); -+MODULE_ALIAS_CRYPTO("xchacha12-arm"); -+#ifdef CONFIG_KERNEL_MODE_NEON -+MODULE_ALIAS_CRYPTO("chacha20-neon"); -+MODULE_ALIAS_CRYPTO("xchacha20-neon"); -+MODULE_ALIAS_CRYPTO("xchacha12-neon"); -+#endif ---- a/arch/arm/crypto/chacha-neon-glue.c -+++ /dev/null -@@ -1,202 +0,0 @@ --/* -- * ARM NEON accelerated ChaCha and XChaCha stream ciphers, -- * including ChaCha20 (RFC7539) -- * -- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License version 2 as -- * published by the Free Software Foundation. -- * -- * Based on: -- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code -- * -- * Copyright (C) 2015 Martin Willi -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- */ -- --#include <crypto/algapi.h> --#include <crypto/internal/chacha.h> --#include <crypto/internal/simd.h> --#include <crypto/internal/skcipher.h> --#include <linux/kernel.h> --#include <linux/module.h> -- --#include <asm/hwcap.h> --#include <asm/neon.h> --#include <asm/simd.h> -- --asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, -- int nrounds); --asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, -- int nrounds); --asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); -- --static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, -- unsigned int bytes, int nrounds) --{ -- u8 buf[CHACHA_BLOCK_SIZE]; -- -- while (bytes >= CHACHA_BLOCK_SIZE * 4) { -- chacha_4block_xor_neon(state, dst, src, nrounds); -- bytes -= CHACHA_BLOCK_SIZE * 4; -- src += CHACHA_BLOCK_SIZE * 4; -- dst += CHACHA_BLOCK_SIZE * 4; -- state[12] += 4; -- } -- while (bytes >= CHACHA_BLOCK_SIZE) { -- chacha_block_xor_neon(state, dst, src, nrounds); -- bytes -= CHACHA_BLOCK_SIZE; -- src += CHACHA_BLOCK_SIZE; -- dst += CHACHA_BLOCK_SIZE; -- state[12]++; -- } -- if (bytes) { -- memcpy(buf, src, bytes); -- chacha_block_xor_neon(state, buf, buf, nrounds); -- memcpy(dst, buf, bytes); -- } --} -- --static int chacha_neon_stream_xor(struct skcipher_request *req, -- const struct chacha_ctx *ctx, const u8 *iv) --{ -- struct skcipher_walk walk; -- u32 state[16]; -- int err; -- -- err = skcipher_walk_virt(&walk, req, false); -- -- crypto_chacha_init(state, ctx, iv); -- -- while (walk.nbytes > 0) { -- unsigned int nbytes = walk.nbytes; -- -- if (nbytes < walk.total) -- nbytes = round_down(nbytes, walk.stride); -- -- kernel_neon_begin(); -- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, -- nbytes, ctx->nrounds); -- kernel_neon_end(); -- err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -- } -- -- return err; --} -- --static int chacha_neon(struct skcipher_request *req) --{ -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_chacha_crypt(req); -- -- return chacha_neon_stream_xor(req, ctx, req->iv); --} -- --static int xchacha_neon(struct skcipher_request *req) --{ -- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- struct chacha_ctx subctx; -- u32 state[16]; -- u8 real_iv[16]; -- -- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) -- return crypto_xchacha_crypt(req); -- -- crypto_chacha_init(state, ctx, req->iv); -- -- kernel_neon_begin(); -- hchacha_block_neon(state, subctx.key, ctx->nrounds); -- kernel_neon_end(); -- subctx.nrounds = ctx->nrounds; -- -- memcpy(&real_iv[0], req->iv + 24, 8); -- memcpy(&real_iv[8], req->iv + 16, 8); -- return chacha_neon_stream_xor(req, &subctx, real_iv); --} -- --static struct skcipher_alg algs[] = { -- { -- .base.cra_name = "chacha20", -- .base.cra_driver_name = "chacha20-neon", -- .base.cra_priority = 300, -- .base.cra_blocksize = 1, -- .base.cra_ctxsize = sizeof(struct chacha_ctx), -- .base.cra_module = THIS_MODULE, -- -- .min_keysize = CHACHA_KEY_SIZE, -- .max_keysize = CHACHA_KEY_SIZE, -- .ivsize = CHACHA_IV_SIZE, -- .chunksize = CHACHA_BLOCK_SIZE, -- .walksize = 4 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -- .encrypt = chacha_neon, -- .decrypt = chacha_neon, -- }, { -- .base.cra_name = "xchacha20", -- .base.cra_driver_name = "xchacha20-neon", -- .base.cra_priority = 300, -- .base.cra_blocksize = 1, -- .base.cra_ctxsize = sizeof(struct chacha_ctx), -- .base.cra_module = THIS_MODULE, -- -- .min_keysize = CHACHA_KEY_SIZE, -- .max_keysize = CHACHA_KEY_SIZE, -- .ivsize = XCHACHA_IV_SIZE, -- .chunksize = CHACHA_BLOCK_SIZE, -- .walksize = 4 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -- .encrypt = xchacha_neon, -- .decrypt = xchacha_neon, -- }, { -- .base.cra_name = "xchacha12", -- .base.cra_driver_name = "xchacha12-neon", -- .base.cra_priority = 300, -- .base.cra_blocksize = 1, -- .base.cra_ctxsize = sizeof(struct chacha_ctx), -- .base.cra_module = THIS_MODULE, -- -- .min_keysize = CHACHA_KEY_SIZE, -- .max_keysize = CHACHA_KEY_SIZE, -- .ivsize = XCHACHA_IV_SIZE, -- .chunksize = CHACHA_BLOCK_SIZE, -- .walksize = 4 * CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha12_setkey, -- .encrypt = xchacha_neon, -- .decrypt = xchacha_neon, -- } --}; -- --static int __init chacha_simd_mod_init(void) --{ -- if (!(elf_hwcap & HWCAP_NEON)) -- return -ENODEV; -- -- return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); --} -- --static void __exit chacha_simd_mod_fini(void) --{ -- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); --} -- --module_init(chacha_simd_mod_init); --module_exit(chacha_simd_mod_fini); -- --MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); --MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); --MODULE_LICENSE("GPL v2"); --MODULE_ALIAS_CRYPTO("chacha20"); --MODULE_ALIAS_CRYPTO("chacha20-neon"); --MODULE_ALIAS_CRYPTO("xchacha20"); --MODULE_ALIAS_CRYPTO("xchacha20-neon"); --MODULE_ALIAS_CRYPTO("xchacha12"); --MODULE_ALIAS_CRYPTO("xchacha12-neon"); ---- a/arch/arm/crypto/chacha-scalar-core.S -+++ b/arch/arm/crypto/chacha-scalar-core.S -@@ -41,14 +41,6 @@ - X14 .req r12 - X15 .req r14 - --.Lexpand_32byte_k: -- // "expand 32-byte k" -- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 -- --#ifdef __thumb2__ --# define adrl adr --#endif -- - .macro __rev out, in, t0, t1, t2 - .if __LINUX_ARM_ARCH__ >= 6 - rev \out, \in -@@ -391,61 +383,65 @@ - .endm // _chacha - - /* -- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], -- * const u32 iv[4]); -+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, -+ * const u32 *state, int nrounds); - */ --ENTRY(chacha20_arm) -+ENTRY(chacha_doarm) - cmp r2, #0 // len == 0? - reteq lr - -+ ldr ip, [sp] -+ cmp ip, #12 -+ - push {r0-r2,r4-r11,lr} - - // Push state x0-x15 onto stack. - // Also store an extra copy of x10-x11 just before the state. - -- ldr r4, [sp, #48] // iv -- mov r0, sp -- sub sp, #80 -- -- // iv: x12-x15 -- ldm r4, {X12,X13,X14,X15} -- stmdb r0!, {X12,X13,X14,X15} -+ add X12, r3, #48 -+ ldm X12, {X12,X13,X14,X15} -+ push {X12,X13,X14,X15} -+ sub sp, sp, #64 - -- // key: x4-x11 -- __ldrd X8_X10, X9_X11, r3, 24 -+ __ldrd X8_X10, X9_X11, r3, 40 - __strd X8_X10, X9_X11, sp, 8 -- stmdb r0!, {X8_X10, X9_X11} -- ldm r3, {X4-X9_X11} -- stmdb r0!, {X4-X9_X11} -- -- // constants: x0-x3 -- adrl X3, .Lexpand_32byte_k -- ldm X3, {X0-X3} -+ __strd X8_X10, X9_X11, sp, 56 -+ ldm r3, {X0-X9_X11} - __strd X0, X1, sp, 16 - __strd X2, X3, sp, 24 -+ __strd X4, X5, sp, 32 -+ __strd X6, X7, sp, 40 -+ __strd X8_X10, X9_X11, sp, 48 - -+ beq 1f - _chacha 20 - -- add sp, #76 -+0: add sp, #76 - pop {r4-r11, pc} --ENDPROC(chacha20_arm) -+ -+1: _chacha 12 -+ b 0b -+ENDPROC(chacha_doarm) - - /* -- * void hchacha20_arm(const u32 state[16], u32 out[8]); -+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); - */ --ENTRY(hchacha20_arm) -+ENTRY(hchacha_block_arm) - push {r1,r4-r11,lr} - -+ cmp r2, #12 // ChaCha12 ? -+ - mov r14, r0 - ldmia r14!, {r0-r11} // load x0-x11 - push {r10-r11} // store x10-x11 to stack - ldm r14, {r10-r12,r14} // load x12-x15 - sub sp, #8 - -+ beq 1f - _chacha_permute 20 - - // Skip over (unused0-unused1, x10-x11) -- add sp, #16 -+0: add sp, #16 - - // Fix up rotations of x12-x15 - ror X12, X12, #drot -@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm) - stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} - - pop {r4-r11,pc} --ENDPROC(hchacha20_arm) -+ -+1: _chacha_permute 12 -+ b 0b -+ENDPROC(hchacha_block_arm) ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -1,5 +1,5 @@ - /* -- * ARM NEON accelerated ChaCha and XChaCha stream ciphers, -+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) - * - * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> diff --git a/target/linux/generic/backport-5.4/080-wireguard-0009-crypto-arm-chacha-expose-ARM-ChaCha-routine-as-libra.patch b/target/linux/generic/backport-5.4/080-wireguard-0009-crypto-arm-chacha-expose-ARM-ChaCha-routine-as-libra.patch deleted file mode 100644 index 4006dc63b2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0009-crypto-arm-chacha-expose-ARM-ChaCha-routine-as-libra.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:15 +0100 -Subject: [PATCH] crypto: arm/chacha - expose ARM ChaCha routine as library - function - -commit a44a3430d71bad4ee56788a59fff099b291ea54c upstream. - -Expose the accelerated NEON ChaCha routine directly as a symbol -export so that users of the ChaCha library API can use it directly. - -Given that calls into the library API will always go through the -routines in this module if it is enabled, switch to static keys -to select the optimal implementation available (which may be none -at all, in which case we defer to the generic implementation for -all invocations). - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/Kconfig | 1 + - arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++- - 2 files changed, 41 insertions(+), 1 deletion(-) - ---- a/arch/arm/crypto/Kconfig -+++ b/arch/arm/crypto/Kconfig -@@ -129,6 +129,7 @@ config CRYPTO_CRC32_ARM_CE - config CRYPTO_CHACHA20_NEON - tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" - select CRYPTO_BLKCIPHER -+ select CRYPTO_ARCH_HAVE_LIB_CHACHA - - config CRYPTO_NHPOLY1305_NEON - tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -11,6 +11,7 @@ - #include <crypto/internal/chacha.h> - #include <crypto/internal/simd.h> - #include <crypto/internal/skcipher.h> -+#include <linux/jump_label.h> - #include <linux/kernel.h> - #include <linux/module.h> - -@@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const - asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, - const u32 *state, int nrounds); - -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); -+ - static inline bool neon_usable(void) - { -- return crypto_simd_usable(); -+ return static_branch_likely(&use_neon) && crypto_simd_usable(); - } - - static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, -@@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8 - } - } - -+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) -+{ -+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { -+ hchacha_block_arm(state, stream, nrounds); -+ } else { -+ kernel_neon_begin(); -+ hchacha_block_neon(state, stream, nrounds); -+ kernel_neon_end(); -+ } -+} -+EXPORT_SYMBOL(hchacha_block_arch); -+ -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -+{ -+ chacha_init_generic(state, key, iv); -+} -+EXPORT_SYMBOL(chacha_init_arch); -+ -+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, -+ int nrounds) -+{ -+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || -+ bytes <= CHACHA_BLOCK_SIZE) { -+ chacha_doarm(dst, src, bytes, state, nrounds); -+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); -+ return; -+ } -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, bytes, nrounds); -+ kernel_neon_end(); -+} -+EXPORT_SYMBOL(chacha_crypt_arch); -+ - static int chacha_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv, - bool neon) -@@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(v - for (i = 0; i < ARRAY_SIZE(neon_algs); i++) - neon_algs[i].base.cra_priority = 0; - break; -+ default: -+ static_branch_enable(&use_neon); - } - - err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0010-crypto-mips-chacha-import-32r2-ChaCha-code-from-Zinc.patch b/target/linux/generic/backport-5.4/080-wireguard-0010-crypto-mips-chacha-import-32r2-ChaCha-code-from-Zinc.patch deleted file mode 100644 index 0a2b4c4523..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0010-crypto-mips-chacha-import-32r2-ChaCha-code-from-Zinc.patch +++ /dev/null @@ -1,451 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:16 +0100 -Subject: [PATCH] crypto: mips/chacha - import 32r2 ChaCha code from Zinc -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 upstream. - -This imports the accelerated MIPS 32r2 ChaCha20 implementation from the -Zinc patch set. - -Co-developed-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++ - 1 file changed, 424 insertions(+) - create mode 100644 arch/mips/crypto/chacha-core.S - ---- /dev/null -+++ b/arch/mips/crypto/chacha-core.S -@@ -0,0 +1,424 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#define MASK_U32 0x3c -+#define CHACHA20_BLOCK_SIZE 64 -+#define STACK_SIZE 32 -+ -+#define X0 $t0 -+#define X1 $t1 -+#define X2 $t2 -+#define X3 $t3 -+#define X4 $t4 -+#define X5 $t5 -+#define X6 $t6 -+#define X7 $t7 -+#define X8 $t8 -+#define X9 $t9 -+#define X10 $v1 -+#define X11 $s6 -+#define X12 $s5 -+#define X13 $s4 -+#define X14 $s3 -+#define X15 $s2 -+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ -+#define T0 $s1 -+#define T1 $s0 -+#define T(n) T ## n -+#define X(n) X ## n -+ -+/* Input arguments */ -+#define STATE $a0 -+#define OUT $a1 -+#define IN $a2 -+#define BYTES $a3 -+ -+/* Output argument */ -+/* NONCE[0] is kept in a register and not in memory. -+ * We don't want to touch original value in memory. -+ * Must be incremented every loop iteration. -+ */ -+#define NONCE_0 $v0 -+ -+/* SAVED_X and SAVED_CA are set in the jump table. -+ * Use regs which are overwritten on exit else we don't leak clear data. -+ * They are used to handling the last bytes which are not multiple of 4. -+ */ -+#define SAVED_X X15 -+#define SAVED_CA $s7 -+ -+#define IS_UNALIGNED $s7 -+ -+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+#define MSB 0 -+#define LSB 3 -+#define ROTx rotl -+#define ROTR(n) rotr n, 24 -+#define CPU_TO_LE32(n) \ -+ wsbh n; \ -+ rotr n, 16; -+#else -+#define MSB 3 -+#define LSB 0 -+#define ROTx rotr -+#define CPU_TO_LE32(n) -+#define ROTR(n) -+#endif -+ -+#define FOR_EACH_WORD(x) \ -+ x( 0); \ -+ x( 1); \ -+ x( 2); \ -+ x( 3); \ -+ x( 4); \ -+ x( 5); \ -+ x( 6); \ -+ x( 7); \ -+ x( 8); \ -+ x( 9); \ -+ x(10); \ -+ x(11); \ -+ x(12); \ -+ x(13); \ -+ x(14); \ -+ x(15); -+ -+#define FOR_EACH_WORD_REV(x) \ -+ x(15); \ -+ x(14); \ -+ x(13); \ -+ x(12); \ -+ x(11); \ -+ x(10); \ -+ x( 9); \ -+ x( 8); \ -+ x( 7); \ -+ x( 6); \ -+ x( 5); \ -+ x( 4); \ -+ x( 3); \ -+ x( 2); \ -+ x( 1); \ -+ x( 0); -+ -+#define PLUS_ONE_0 1 -+#define PLUS_ONE_1 2 -+#define PLUS_ONE_2 3 -+#define PLUS_ONE_3 4 -+#define PLUS_ONE_4 5 -+#define PLUS_ONE_5 6 -+#define PLUS_ONE_6 7 -+#define PLUS_ONE_7 8 -+#define PLUS_ONE_8 9 -+#define PLUS_ONE_9 10 -+#define PLUS_ONE_10 11 -+#define PLUS_ONE_11 12 -+#define PLUS_ONE_12 13 -+#define PLUS_ONE_13 14 -+#define PLUS_ONE_14 15 -+#define PLUS_ONE_15 16 -+#define PLUS_ONE(x) PLUS_ONE_ ## x -+#define _CONCAT3(a,b,c) a ## b ## c -+#define CONCAT3(a,b,c) _CONCAT3(a,b,c) -+ -+#define STORE_UNALIGNED(x) \ -+CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ -+ .if (x != 12); \ -+ lw T0, (x*4)(STATE); \ -+ .endif; \ -+ lwl T1, (x*4)+MSB ## (IN); \ -+ lwr T1, (x*4)+LSB ## (IN); \ -+ .if (x == 12); \ -+ addu X ## x, NONCE_0; \ -+ .else; \ -+ addu X ## x, T0; \ -+ .endif; \ -+ CPU_TO_LE32(X ## x); \ -+ xor X ## x, T1; \ -+ swl X ## x, (x*4)+MSB ## (OUT); \ -+ swr X ## x, (x*4)+LSB ## (OUT); -+ -+#define STORE_ALIGNED(x) \ -+CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ -+ .if (x != 12); \ -+ lw T0, (x*4)(STATE); \ -+ .endif; \ -+ lw T1, (x*4) ## (IN); \ -+ .if (x == 12); \ -+ addu X ## x, NONCE_0; \ -+ .else; \ -+ addu X ## x, T0; \ -+ .endif; \ -+ CPU_TO_LE32(X ## x); \ -+ xor X ## x, T1; \ -+ sw X ## x, (x*4) ## (OUT); -+ -+/* Jump table macro. -+ * Used for setup and handling the last bytes, which are not multiple of 4. -+ * X15 is free to store Xn -+ * Every jumptable entry must be equal in size. -+ */ -+#define JMPTBL_ALIGNED(x) \ -+.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ -+ .set noreorder; \ -+ b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ -+ .if (x == 12); \ -+ addu SAVED_X, X ## x, NONCE_0; \ -+ .else; \ -+ addu SAVED_X, X ## x, SAVED_CA; \ -+ .endif; \ -+ .set reorder -+ -+#define JMPTBL_UNALIGNED(x) \ -+.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ -+ .set noreorder; \ -+ b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ -+ .if (x == 12); \ -+ addu SAVED_X, X ## x, NONCE_0; \ -+ .else; \ -+ addu SAVED_X, X ## x, SAVED_CA; \ -+ .endif; \ -+ .set reorder -+ -+#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ -+ addu X(A), X(K); \ -+ addu X(B), X(L); \ -+ addu X(C), X(M); \ -+ addu X(D), X(N); \ -+ xor X(V), X(A); \ -+ xor X(W), X(B); \ -+ xor X(Y), X(C); \ -+ xor X(Z), X(D); \ -+ rotl X(V), S; \ -+ rotl X(W), S; \ -+ rotl X(Y), S; \ -+ rotl X(Z), S; -+ -+.text -+.set reorder -+.set noat -+.globl chacha20_mips -+.ent chacha20_mips -+chacha20_mips: -+ .frame $sp, STACK_SIZE, $ra -+ -+ addiu $sp, -STACK_SIZE -+ -+ /* Return bytes = 0. */ -+ beqz BYTES, .Lchacha20_mips_end -+ -+ lw NONCE_0, 48(STATE) -+ -+ /* Save s0-s7 */ -+ sw $s0, 0($sp) -+ sw $s1, 4($sp) -+ sw $s2, 8($sp) -+ sw $s3, 12($sp) -+ sw $s4, 16($sp) -+ sw $s5, 20($sp) -+ sw $s6, 24($sp) -+ sw $s7, 28($sp) -+ -+ /* Test IN or OUT is unaligned. -+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 -+ */ -+ or IS_UNALIGNED, IN, OUT -+ andi IS_UNALIGNED, 0x3 -+ -+ /* Set number of rounds */ -+ li $at, 20 -+ -+ b .Lchacha20_rounds_start -+ -+.align 4 -+.Loop_chacha20_rounds: -+ addiu IN, CHACHA20_BLOCK_SIZE -+ addiu OUT, CHACHA20_BLOCK_SIZE -+ addiu NONCE_0, 1 -+ -+.Lchacha20_rounds_start: -+ lw X0, 0(STATE) -+ lw X1, 4(STATE) -+ lw X2, 8(STATE) -+ lw X3, 12(STATE) -+ -+ lw X4, 16(STATE) -+ lw X5, 20(STATE) -+ lw X6, 24(STATE) -+ lw X7, 28(STATE) -+ lw X8, 32(STATE) -+ lw X9, 36(STATE) -+ lw X10, 40(STATE) -+ lw X11, 44(STATE) -+ -+ move X12, NONCE_0 -+ lw X13, 52(STATE) -+ lw X14, 56(STATE) -+ lw X15, 60(STATE) -+ -+.Loop_chacha20_xor_rounds: -+ addiu $at, -2 -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); -+ bnez $at, .Loop_chacha20_xor_rounds -+ -+ addiu BYTES, -(CHACHA20_BLOCK_SIZE) -+ -+ /* Is data src/dst unaligned? Jump */ -+ bnez IS_UNALIGNED, .Loop_chacha20_unaligned -+ -+ /* Set number rounds here to fill delayslot. */ -+ li $at, 20 -+ -+ /* BYTES < 0, it has no full block. */ -+ bltz BYTES, .Lchacha20_mips_no_full_block_aligned -+ -+ FOR_EACH_WORD_REV(STORE_ALIGNED) -+ -+ /* BYTES > 0? Loop again. */ -+ bgtz BYTES, .Loop_chacha20_rounds -+ -+ /* Place this here to fill delay slot */ -+ addiu NONCE_0, 1 -+ -+ /* BYTES < 0? Handle last bytes */ -+ bltz BYTES, .Lchacha20_mips_xor_bytes -+ -+.Lchacha20_mips_xor_done: -+ /* Restore used registers */ -+ lw $s0, 0($sp) -+ lw $s1, 4($sp) -+ lw $s2, 8($sp) -+ lw $s3, 12($sp) -+ lw $s4, 16($sp) -+ lw $s5, 20($sp) -+ lw $s6, 24($sp) -+ lw $s7, 28($sp) -+ -+ /* Write NONCE_0 back to right location in state */ -+ sw NONCE_0, 48(STATE) -+ -+.Lchacha20_mips_end: -+ addiu $sp, STACK_SIZE -+ jr $ra -+ -+.Lchacha20_mips_no_full_block_aligned: -+ /* Restore the offset on BYTES */ -+ addiu BYTES, CHACHA20_BLOCK_SIZE -+ -+ /* Get number of full WORDS */ -+ andi $at, BYTES, MASK_U32 -+ -+ /* Load upper half of jump table addr */ -+ lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) -+ -+ /* Calculate lower half jump table offset */ -+ ins T0, $at, 1, 6 -+ -+ /* Add offset to STATE */ -+ addu T1, STATE, $at -+ -+ /* Add lower half jump table addr */ -+ addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) -+ -+ /* Read value from STATE */ -+ lw SAVED_CA, 0(T1) -+ -+ /* Store remaining bytecounter as negative value */ -+ subu BYTES, $at, BYTES -+ -+ jr T0 -+ -+ /* Jump table */ -+ FOR_EACH_WORD(JMPTBL_ALIGNED) -+ -+ -+.Loop_chacha20_unaligned: -+ /* Set number rounds here to fill delayslot. */ -+ li $at, 20 -+ -+ /* BYTES > 0, it has no full block. */ -+ bltz BYTES, .Lchacha20_mips_no_full_block_unaligned -+ -+ FOR_EACH_WORD_REV(STORE_UNALIGNED) -+ -+ /* BYTES > 0? Loop again. */ -+ bgtz BYTES, .Loop_chacha20_rounds -+ -+ /* Write NONCE_0 back to right location in state */ -+ sw NONCE_0, 48(STATE) -+ -+ .set noreorder -+ /* Fall through to byte handling */ -+ bgez BYTES, .Lchacha20_mips_xor_done -+.Lchacha20_mips_xor_unaligned_0_b: -+.Lchacha20_mips_xor_aligned_0_b: -+ /* Place this here to fill delay slot */ -+ addiu NONCE_0, 1 -+ .set reorder -+ -+.Lchacha20_mips_xor_bytes: -+ addu IN, $at -+ addu OUT, $at -+ /* First byte */ -+ lbu T1, 0(IN) -+ addiu $at, BYTES, 1 -+ CPU_TO_LE32(SAVED_X) -+ ROTR(SAVED_X) -+ xor T1, SAVED_X -+ sb T1, 0(OUT) -+ beqz $at, .Lchacha20_mips_xor_done -+ /* Second byte */ -+ lbu T1, 1(IN) -+ addiu $at, BYTES, 2 -+ ROTx SAVED_X, 8 -+ xor T1, SAVED_X -+ sb T1, 1(OUT) -+ beqz $at, .Lchacha20_mips_xor_done -+ /* Third byte */ -+ lbu T1, 2(IN) -+ ROTx SAVED_X, 8 -+ xor T1, SAVED_X -+ sb T1, 2(OUT) -+ b .Lchacha20_mips_xor_done -+ -+.Lchacha20_mips_no_full_block_unaligned: -+ /* Restore the offset on BYTES */ -+ addiu BYTES, CHACHA20_BLOCK_SIZE -+ -+ /* Get number of full WORDS */ -+ andi $at, BYTES, MASK_U32 -+ -+ /* Load upper half of jump table addr */ -+ lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) -+ -+ /* Calculate lower half jump table offset */ -+ ins T0, $at, 1, 6 -+ -+ /* Add offset to STATE */ -+ addu T1, STATE, $at -+ -+ /* Add lower half jump table addr */ -+ addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) -+ -+ /* Read value from STATE */ -+ lw SAVED_CA, 0(T1) -+ -+ /* Store remaining bytecounter as negative value */ -+ subu BYTES, $at, BYTES -+ -+ jr T0 -+ -+ /* Jump table */ -+ FOR_EACH_WORD(JMPTBL_UNALIGNED) -+.end chacha20_mips -+.set at diff --git a/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch b/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch deleted file mode 100644 index 0d24ce29e5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch +++ /dev/null @@ -1,559 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:17 +0100 -Subject: [PATCH] crypto: mips/chacha - wire up accelerated 32r2 code from Zinc -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream. - -This integrates the accelerated MIPS 32r2 implementation of ChaCha -into both the API and library interfaces of the kernel crypto stack. - -The significance of this is that, in addition to becoming available -as an accelerated library implementation, it can also be used by -existing crypto API code such as Adiantum (for block encryption on -ultra low performance cores) or IPsec using chacha20poly1305. These -are use cases that have already opted into using the abstract crypto -API. In order to support Adiantum, the core assembler routine has -been adapted to take the round count as a function argument rather -than hardcoding it to 20. - -Co-developed-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/Makefile | 2 +- - arch/mips/crypto/Makefile | 4 + - arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++--------- - arch/mips/crypto/chacha-glue.c | 150 +++++++++++++++++++++++++++++++ - crypto/Kconfig | 6 ++ - 5 files changed, 277 insertions(+), 44 deletions(-) - create mode 100644 arch/mips/crypto/chacha-glue.c - ---- a/arch/mips/Makefile -+++ b/arch/mips/Makefile -@@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/m - # See arch/mips/Kbuild for content of core part of the kernel - core-y += arch/mips/ - --drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ -+drivers-y += arch/mips/crypto/ - drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ - - # suspend and hibernation support ---- a/arch/mips/crypto/Makefile -+++ b/arch/mips/crypto/Makefile -@@ -4,3 +4,7 @@ - # - - obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o -+ -+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -+chacha-mips-y := chacha-core.o chacha-glue.o -+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots ---- a/arch/mips/crypto/chacha-core.S -+++ b/arch/mips/crypto/chacha-core.S -@@ -125,7 +125,7 @@ - #define CONCAT3(a,b,c) _CONCAT3(a,b,c) - - #define STORE_UNALIGNED(x) \ --CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ -+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ -@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, - swr X ## x, (x*4)+LSB ## (OUT); - - #define STORE_ALIGNED(x) \ --CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ -+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ -@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - * Every jumptable entry must be equal in size. - */ - #define JMPTBL_ALIGNED(x) \ --.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ -+.Lchacha_mips_jmptbl_aligned_ ## x: ; \ - .set noreorder; \ -- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ -+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ -@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - .set reorder - - #define JMPTBL_UNALIGNED(x) \ --.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ -+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ - .set noreorder; \ -- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ -+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ -@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - .text - .set reorder - .set noat --.globl chacha20_mips --.ent chacha20_mips --chacha20_mips: -+.globl chacha_crypt_arch -+.ent chacha_crypt_arch -+chacha_crypt_arch: - .frame $sp, STACK_SIZE, $ra - -+ /* Load number of rounds */ -+ lw $at, 16($sp) -+ - addiu $sp, -STACK_SIZE - - /* Return bytes = 0. */ -- beqz BYTES, .Lchacha20_mips_end -+ beqz BYTES, .Lchacha_mips_end - - lw NONCE_0, 48(STATE) - -@@ -228,18 +231,15 @@ chacha20_mips: - or IS_UNALIGNED, IN, OUT - andi IS_UNALIGNED, 0x3 - -- /* Set number of rounds */ -- li $at, 20 -- -- b .Lchacha20_rounds_start -+ b .Lchacha_rounds_start - - .align 4 --.Loop_chacha20_rounds: -+.Loop_chacha_rounds: - addiu IN, CHACHA20_BLOCK_SIZE - addiu OUT, CHACHA20_BLOCK_SIZE - addiu NONCE_0, 1 - --.Lchacha20_rounds_start: -+.Lchacha_rounds_start: - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) -@@ -259,7 +259,7 @@ chacha20_mips: - lw X14, 56(STATE) - lw X15, 60(STATE) - --.Loop_chacha20_xor_rounds: -+.Loop_chacha_xor_rounds: - addiu $at, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); -@@ -269,31 +269,31 @@ chacha20_mips: - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); -- bnez $at, .Loop_chacha20_xor_rounds -+ bnez $at, .Loop_chacha_xor_rounds - - addiu BYTES, -(CHACHA20_BLOCK_SIZE) - - /* Is data src/dst unaligned? Jump */ -- bnez IS_UNALIGNED, .Loop_chacha20_unaligned -+ bnez IS_UNALIGNED, .Loop_chacha_unaligned - - /* Set number rounds here to fill delayslot. */ -- li $at, 20 -+ lw $at, (STACK_SIZE+16)($sp) - - /* BYTES < 0, it has no full block. */ -- bltz BYTES, .Lchacha20_mips_no_full_block_aligned -+ bltz BYTES, .Lchacha_mips_no_full_block_aligned - - FOR_EACH_WORD_REV(STORE_ALIGNED) - - /* BYTES > 0? Loop again. */ -- bgtz BYTES, .Loop_chacha20_rounds -+ bgtz BYTES, .Loop_chacha_rounds - - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - - /* BYTES < 0? Handle last bytes */ -- bltz BYTES, .Lchacha20_mips_xor_bytes -+ bltz BYTES, .Lchacha_mips_xor_bytes - --.Lchacha20_mips_xor_done: -+.Lchacha_mips_xor_done: - /* Restore used registers */ - lw $s0, 0($sp) - lw $s1, 4($sp) -@@ -307,11 +307,11 @@ chacha20_mips: - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - --.Lchacha20_mips_end: -+.Lchacha_mips_end: - addiu $sp, STACK_SIZE - jr $ra - --.Lchacha20_mips_no_full_block_aligned: -+.Lchacha_mips_no_full_block_aligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - -@@ -319,7 +319,7 @@ chacha20_mips: - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ -- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) -+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 -@@ -328,7 +328,7 @@ chacha20_mips: - addu T1, STATE, $at - - /* Add lower half jump table addr */ -- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) -+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) -@@ -342,31 +342,31 @@ chacha20_mips: - FOR_EACH_WORD(JMPTBL_ALIGNED) - - --.Loop_chacha20_unaligned: -+.Loop_chacha_unaligned: - /* Set number rounds here to fill delayslot. */ -- li $at, 20 -+ lw $at, (STACK_SIZE+16)($sp) - - /* BYTES > 0, it has no full block. */ -- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned -+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned - - FOR_EACH_WORD_REV(STORE_UNALIGNED) - - /* BYTES > 0? Loop again. */ -- bgtz BYTES, .Loop_chacha20_rounds -+ bgtz BYTES, .Loop_chacha_rounds - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - - .set noreorder - /* Fall through to byte handling */ -- bgez BYTES, .Lchacha20_mips_xor_done --.Lchacha20_mips_xor_unaligned_0_b: --.Lchacha20_mips_xor_aligned_0_b: -+ bgez BYTES, .Lchacha_mips_xor_done -+.Lchacha_mips_xor_unaligned_0_b: -+.Lchacha_mips_xor_aligned_0_b: - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - .set reorder - --.Lchacha20_mips_xor_bytes: -+.Lchacha_mips_xor_bytes: - addu IN, $at - addu OUT, $at - /* First byte */ -@@ -376,22 +376,22 @@ chacha20_mips: - ROTR(SAVED_X) - xor T1, SAVED_X - sb T1, 0(OUT) -- beqz $at, .Lchacha20_mips_xor_done -+ beqz $at, .Lchacha_mips_xor_done - /* Second byte */ - lbu T1, 1(IN) - addiu $at, BYTES, 2 - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 1(OUT) -- beqz $at, .Lchacha20_mips_xor_done -+ beqz $at, .Lchacha_mips_xor_done - /* Third byte */ - lbu T1, 2(IN) - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 2(OUT) -- b .Lchacha20_mips_xor_done -+ b .Lchacha_mips_xor_done - --.Lchacha20_mips_no_full_block_unaligned: -+.Lchacha_mips_no_full_block_unaligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - -@@ -399,7 +399,7 @@ chacha20_mips: - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ -- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) -+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 -@@ -408,7 +408,7 @@ chacha20_mips: - addu T1, STATE, $at - - /* Add lower half jump table addr */ -- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) -+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) -@@ -420,5 +420,78 @@ chacha20_mips: - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_UNALIGNED) --.end chacha20_mips -+.end chacha_crypt_arch -+.set at -+ -+/* Input arguments -+ * STATE $a0 -+ * OUT $a1 -+ * NROUND $a2 -+ */ -+ -+#undef X12 -+#undef X13 -+#undef X14 -+#undef X15 -+ -+#define X12 $a3 -+#define X13 $at -+#define X14 $v0 -+#define X15 STATE -+ -+.set noat -+.globl hchacha_block_arch -+.ent hchacha_block_arch -+hchacha_block_arch: -+ .frame $sp, STACK_SIZE, $ra -+ -+ addiu $sp, -STACK_SIZE -+ -+ /* Save X11(s6) */ -+ sw X11, 0($sp) -+ -+ lw X0, 0(STATE) -+ lw X1, 4(STATE) -+ lw X2, 8(STATE) -+ lw X3, 12(STATE) -+ lw X4, 16(STATE) -+ lw X5, 20(STATE) -+ lw X6, 24(STATE) -+ lw X7, 28(STATE) -+ lw X8, 32(STATE) -+ lw X9, 36(STATE) -+ lw X10, 40(STATE) -+ lw X11, 44(STATE) -+ lw X12, 48(STATE) -+ lw X13, 52(STATE) -+ lw X14, 56(STATE) -+ lw X15, 60(STATE) -+ -+.Loop_hchacha_xor_rounds: -+ addiu $a2, -2 -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); -+ bnez $a2, .Loop_hchacha_xor_rounds -+ -+ /* Restore used register */ -+ lw X11, 0($sp) -+ -+ sw X0, 0(OUT) -+ sw X1, 4(OUT) -+ sw X2, 8(OUT) -+ sw X3, 12(OUT) -+ sw X12, 16(OUT) -+ sw X13, 20(OUT) -+ sw X14, 24(OUT) -+ sw X15, 28(OUT) -+ -+ addiu $sp, STACK_SIZE -+ jr $ra -+.end hchacha_block_arch - .set at ---- /dev/null -+++ b/arch/mips/crypto/chacha-glue.c -@@ -0,0 +1,150 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * MIPS accelerated ChaCha and XChaCha stream ciphers, -+ * including ChaCha20 (RFC7539) -+ * -+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> -+ */ -+ -+#include <asm/byteorder.h> -+#include <crypto/algapi.h> -+#include <crypto/internal/chacha.h> -+#include <crypto/internal/skcipher.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds); -+EXPORT_SYMBOL(chacha_crypt_arch); -+ -+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); -+EXPORT_SYMBOL(hchacha_block_arch); -+ -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -+{ -+ chacha_init_generic(state, key, iv); -+} -+EXPORT_SYMBOL(chacha_init_arch); -+ -+static int chacha_mips_stream_xor(struct skcipher_request *req, -+ const struct chacha_ctx *ctx, const u8 *iv) -+{ -+ struct skcipher_walk walk; -+ u32 state[16]; -+ int err; -+ -+ err = skcipher_walk_virt(&walk, req, false); -+ -+ chacha_init_generic(state, ctx->key, iv); -+ -+ while (walk.nbytes > 0) { -+ unsigned int nbytes = walk.nbytes; -+ -+ if (nbytes < walk.total) -+ nbytes = round_down(nbytes, walk.stride); -+ -+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, -+ nbytes, ctx->nrounds); -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ } -+ -+ return err; -+} -+ -+static int chacha_mips(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ -+ return chacha_mips_stream_xor(req, ctx, req->iv); -+} -+ -+static int xchacha_mips(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct chacha_ctx subctx; -+ u32 state[16]; -+ u8 real_iv[16]; -+ -+ chacha_init_generic(state, ctx->key, req->iv); -+ -+ hchacha_block(state, subctx.key, ctx->nrounds); -+ subctx.nrounds = ctx->nrounds; -+ -+ memcpy(&real_iv[0], req->iv + 24, 8); -+ memcpy(&real_iv[8], req->iv + 16, 8); -+ return chacha_mips_stream_xor(req, &subctx, real_iv); -+} -+ -+static struct skcipher_alg algs[] = { -+ { -+ .base.cra_name = "chacha20", -+ .base.cra_driver_name = "chacha20-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = CHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = chacha_mips, -+ .decrypt = chacha_mips, -+ }, { -+ .base.cra_name = "xchacha20", -+ .base.cra_driver_name = "xchacha20-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = xchacha_mips, -+ .decrypt = xchacha_mips, -+ }, { -+ .base.cra_name = "xchacha12", -+ .base.cra_driver_name = "xchacha12-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha12_setkey, -+ .encrypt = xchacha_mips, -+ .decrypt = xchacha_mips, -+ } -+}; -+ -+static int __init chacha_simd_mod_init(void) -+{ -+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -+} -+ -+static void __exit chacha_simd_mod_fini(void) -+{ -+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -+} -+ -+module_init(chacha_simd_mod_init); -+module_exit(chacha_simd_mod_fini); -+ -+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); -+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("chacha20"); -+MODULE_ALIAS_CRYPTO("chacha20-mips"); -+MODULE_ALIAS_CRYPTO("xchacha20"); -+MODULE_ALIAS_CRYPTO("xchacha20-mips"); -+MODULE_ALIAS_CRYPTO("xchacha12"); -+MODULE_ALIAS_CRYPTO("xchacha12-mips"); ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64 - SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, - XChaCha20, and XChaCha12 stream ciphers. - -+config CRYPTO_CHACHA_MIPS -+ tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)" -+ depends on CPU_MIPS32_R2 -+ select CRYPTO_BLKCIPHER -+ select CRYPTO_ARCH_HAVE_LIB_CHACHA -+ - config CRYPTO_SEED - tristate "SEED cipher algorithm" - select CRYPTO_ALGAPI diff --git a/target/linux/generic/backport-5.4/080-wireguard-0012-crypto-chacha-unexport-chacha_generic-routines.patch b/target/linux/generic/backport-5.4/080-wireguard-0012-crypto-chacha-unexport-chacha_generic-routines.patch deleted file mode 100644 index d06f47a100..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0012-crypto-chacha-unexport-chacha_generic-routines.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:18 +0100 -Subject: [PATCH] crypto: chacha - unexport chacha_generic routines - -commit 22cf705360707ced15f9fe5423938f313c7df536 upstream. - -Now that all users of generic ChaCha code have moved to the core library, -there is no longer a need for the generic ChaCha skcpiher driver to -export parts of it implementation for reuse by other drivers. So drop -the exports, and make the symbols static. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/chacha_generic.c | 26 ++++++++------------------ - include/crypto/internal/chacha.h | 10 ---------- - 2 files changed, 8 insertions(+), 28 deletions(-) - ---- a/crypto/chacha_generic.c -+++ b/crypto/chacha_generic.c -@@ -21,7 +21,7 @@ static int chacha_stream_xor(struct skci - - err = skcipher_walk_virt(&walk, req, false); - -- crypto_chacha_init(state, ctx, iv); -+ chacha_init_generic(state, ctx->key, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; -@@ -37,36 +37,27 @@ static int chacha_stream_xor(struct skci - return err; - } - --void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv) --{ -- chacha_init_generic(state, ctx->key, iv); --} --EXPORT_SYMBOL_GPL(crypto_chacha_init); -- --int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize) -+static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize) - { - return chacha_setkey(tfm, key, keysize, 20); - } --EXPORT_SYMBOL_GPL(crypto_chacha20_setkey); - --int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize) -+static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -+ unsigned int keysize) - { - return chacha_setkey(tfm, key, keysize, 12); - } --EXPORT_SYMBOL_GPL(crypto_chacha12_setkey); - --int crypto_chacha_crypt(struct skcipher_request *req) -+static int crypto_chacha_crypt(struct skcipher_request *req) - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - return chacha_stream_xor(req, ctx, req->iv); - } --EXPORT_SYMBOL_GPL(crypto_chacha_crypt); - --int crypto_xchacha_crypt(struct skcipher_request *req) -+static int crypto_xchacha_crypt(struct skcipher_request *req) - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -@@ -75,7 +66,7 @@ int crypto_xchacha_crypt(struct skcipher - u8 real_iv[16]; - - /* Compute the subkey given the original key and first 128 nonce bits */ -- crypto_chacha_init(state, ctx, req->iv); -+ chacha_init_generic(state, ctx->key, req->iv); - hchacha_block_generic(state, subctx.key, ctx->nrounds); - subctx.nrounds = ctx->nrounds; - -@@ -86,7 +77,6 @@ int crypto_xchacha_crypt(struct skcipher - /* Generate the stream and XOR it with the data */ - return chacha_stream_xor(req, &subctx, real_iv); - } --EXPORT_SYMBOL_GPL(crypto_xchacha_crypt); - - static struct skcipher_alg algs[] = { - { ---- a/include/crypto/internal/chacha.h -+++ b/include/crypto/internal/chacha.h -@@ -12,8 +12,6 @@ struct chacha_ctx { - int nrounds; - }; - --void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv); -- - static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize, int nrounds) - { -@@ -42,12 +40,4 @@ static int inline chacha12_setkey(struct - return chacha_setkey(tfm, key, keysize, 12); - } - --int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize); --int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize); -- --int crypto_chacha_crypt(struct skcipher_request *req); --int crypto_xchacha_crypt(struct skcipher_request *req); -- - #endif /* _CRYPTO_CHACHA_H */ diff --git a/target/linux/generic/backport-5.4/080-wireguard-0013-crypto-poly1305-move-core-routines-into-a-separate-l.patch b/target/linux/generic/backport-5.4/080-wireguard-0013-crypto-poly1305-move-core-routines-into-a-separate-l.patch deleted file mode 100644 index 960300d2a5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0013-crypto-poly1305-move-core-routines-into-a-separate-l.patch +++ /dev/null @@ -1,649 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:19 +0100 -Subject: [PATCH] crypto: poly1305 - move core routines into a separate library - -commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream. - -Move the core Poly1305 routines shared between the generic Poly1305 -shash driver and the Adiantum and NHPoly1305 drivers into a separate -library so that using just this pieces does not pull in the crypto -API pieces of the generic Poly1305 routine. - -In a subsequent patch, we will augment this generic library with -init/update/final routines so that Poyl1305 algorithm can be used -directly without the need for using the crypto API's shash abstraction. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 2 +- - crypto/Kconfig | 5 +- - crypto/adiantum.c | 5 +- - crypto/nhpoly1305.c | 3 +- - crypto/poly1305_generic.c | 195 ++--------------------------- - include/crypto/internal/poly1305.h | 67 ++++++++++ - include/crypto/poly1305.h | 23 ---- - lib/crypto/Kconfig | 3 + - lib/crypto/Makefile | 3 + - lib/crypto/poly1305.c | 158 +++++++++++++++++++++++ - 10 files changed, 248 insertions(+), 216 deletions(-) - create mode 100644 include/crypto/internal/poly1305.h - create mode 100644 lib/crypto/poly1305.c - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -7,8 +7,8 @@ - - #include <crypto/algapi.h> - #include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> - #include <crypto/internal/simd.h> --#include <crypto/poly1305.h> - #include <linux/crypto.h> - #include <linux/kernel.h> - #include <linux/module.h> ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP - config CRYPTO_NHPOLY1305 - tristate - select CRYPTO_HASH -- select CRYPTO_POLY1305 -+ select CRYPTO_LIB_POLY1305_GENERIC - - config CRYPTO_NHPOLY1305_SSE2 - tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)" -@@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2 - config CRYPTO_ADIANTUM - tristate "Adiantum support" - select CRYPTO_CHACHA20 -- select CRYPTO_POLY1305 -+ select CRYPTO_LIB_POLY1305_GENERIC - select CRYPTO_NHPOLY1305 - select CRYPTO_MANAGER - help -@@ -686,6 +686,7 @@ config CRYPTO_GHASH - config CRYPTO_POLY1305 - tristate "Poly1305 authenticator algorithm" - select CRYPTO_HASH -+ select CRYPTO_LIB_POLY1305_GENERIC - help - Poly1305 authenticator algorithm, RFC7539. - ---- a/crypto/adiantum.c -+++ b/crypto/adiantum.c -@@ -33,6 +33,7 @@ - #include <crypto/b128ops.h> - #include <crypto/chacha.h> - #include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> - #include <crypto/internal/skcipher.h> - #include <crypto/nhpoly1305.h> - #include <crypto/scatterwalk.h> -@@ -242,11 +243,11 @@ static void adiantum_hash_header(struct - - BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); - poly1305_core_blocks(&state, &tctx->header_hash_key, -- &header, sizeof(header) / POLY1305_BLOCK_SIZE); -+ &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1); - - BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); - poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, -- TWEAK_SIZE / POLY1305_BLOCK_SIZE); -+ TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); - - poly1305_core_emit(&state, &rctx->header_hash); - } ---- a/crypto/nhpoly1305.c -+++ b/crypto/nhpoly1305.c -@@ -33,6 +33,7 @@ - #include <asm/unaligned.h> - #include <crypto/algapi.h> - #include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> - #include <crypto/nhpoly1305.h> - #include <linux/crypto.h> - #include <linux/kernel.h> -@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct - BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0); - - poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash, -- NH_HASH_BYTES / POLY1305_BLOCK_SIZE); -+ NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1); - } - - /* ---- a/crypto/poly1305_generic.c -+++ b/crypto/poly1305_generic.c -@@ -13,27 +13,12 @@ - - #include <crypto/algapi.h> - #include <crypto/internal/hash.h> --#include <crypto/poly1305.h> -+#include <crypto/internal/poly1305.h> - #include <linux/crypto.h> - #include <linux/kernel.h> - #include <linux/module.h> - #include <asm/unaligned.h> - --static inline u64 mlt(u64 a, u64 b) --{ -- return a * b; --} -- --static inline u32 sr(u64 v, u_char n) --{ -- return v >> n; --} -- --static inline u32 and(u32 v, u32 mask) --{ -- return v & mask; --} -- - int crypto_poly1305_init(struct shash_desc *desc) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -@@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_de - } - EXPORT_SYMBOL_GPL(crypto_poly1305_init); - --void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) --{ -- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; -- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; -- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; -- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; -- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; --} --EXPORT_SYMBOL_GPL(poly1305_core_setkey); -- --/* -- * Poly1305 requires a unique key for each tag, which implies that we can't set -- * it on the tfm that gets accessed by multiple users simultaneously. Instead we -- * expect the key as the first 32 bytes in the update() call. -- */ --unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen) --{ -- if (!dctx->sset) { -- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_core_setkey(&dctx->r, src); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->rset = true; -- } -- if (srclen >= POLY1305_BLOCK_SIZE) { -- dctx->s[0] = get_unaligned_le32(src + 0); -- dctx->s[1] = get_unaligned_le32(src + 4); -- dctx->s[2] = get_unaligned_le32(src + 8); -- dctx->s[3] = get_unaligned_le32(src + 12); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->sset = true; -- } -- } -- return srclen; --} --EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey); -- --static void poly1305_blocks_internal(struct poly1305_state *state, -- const struct poly1305_key *key, -- const void *src, unsigned int nblocks, -- u32 hibit) --{ -- u32 r0, r1, r2, r3, r4; -- u32 s1, s2, s3, s4; -- u32 h0, h1, h2, h3, h4; -- u64 d0, d1, d2, d3, d4; -- -- if (!nblocks) -- return; -- -- r0 = key->r[0]; -- r1 = key->r[1]; -- r2 = key->r[2]; -- r3 = key->r[3]; -- r4 = key->r[4]; -- -- s1 = r1 * 5; -- s2 = r2 * 5; -- s3 = r3 * 5; -- s4 = r4 * 5; -- -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- do { -- /* h += m[i] */ -- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; -- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; -- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; -- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; -- h4 += (get_unaligned_le32(src + 12) >> 8) | hibit; -- -- /* h *= r */ -- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + -- mlt(h3, s2) + mlt(h4, s1); -- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + -- mlt(h3, s3) + mlt(h4, s2); -- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + -- mlt(h3, s4) + mlt(h4, s3); -- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + -- mlt(h3, r0) + mlt(h4, s4); -- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + -- mlt(h3, r1) + mlt(h4, r0); -- -- /* (partial) h %= p */ -- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); -- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); -- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); -- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); -- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); -- h1 += h0 >> 26; h0 = h0 & 0x3ffffff; -- -- src += POLY1305_BLOCK_SIZE; -- } while (--nblocks); -- -- state->h[0] = h0; -- state->h[1] = h1; -- state->h[2] = h2; -- state->h[3] = h3; -- state->h[4] = h4; --} -- --void poly1305_core_blocks(struct poly1305_state *state, -- const struct poly1305_key *key, -- const void *src, unsigned int nblocks) --{ -- poly1305_blocks_internal(state, key, src, nblocks, 1 << 24); --} --EXPORT_SYMBOL_GPL(poly1305_core_blocks); -- --static void poly1305_blocks(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen, u32 hibit) -+static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, -+ unsigned int srclen) - { - unsigned int datalen; - -@@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1 - srclen = datalen; - } - -- poly1305_blocks_internal(&dctx->h, &dctx->r, -- src, srclen / POLY1305_BLOCK_SIZE, hibit); -+ poly1305_core_blocks(&dctx->h, &dctx->r, src, -+ srclen / POLY1305_BLOCK_SIZE, 1); - } - - int crypto_poly1305_update(struct shash_desc *desc, -@@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_ - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { - poly1305_blocks(dctx, dctx->buf, -- POLY1305_BLOCK_SIZE, 1 << 24); -+ POLY1305_BLOCK_SIZE); - dctx->buflen = 0; - } - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { -- poly1305_blocks(dctx, src, srclen, 1 << 24); -+ poly1305_blocks(dctx, src, srclen); - src += srclen - (srclen % POLY1305_BLOCK_SIZE); - srclen %= POLY1305_BLOCK_SIZE; - } -@@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_ - } - EXPORT_SYMBOL_GPL(crypto_poly1305_update); - --void poly1305_core_emit(const struct poly1305_state *state, void *dst) --{ -- u32 h0, h1, h2, h3, h4; -- u32 g0, g1, g2, g3, g4; -- u32 mask; -- -- /* fully carry h */ -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; -- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; -- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; -- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; -- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; -- -- /* compute h + -p */ -- g0 = h0 + 5; -- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; -- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; -- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; -- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; -- -- /* select h if h < p, or h + -p if h >= p */ -- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -- g0 &= mask; -- g1 &= mask; -- g2 &= mask; -- g3 &= mask; -- g4 &= mask; -- mask = ~mask; -- h0 = (h0 & mask) | g0; -- h1 = (h1 & mask) | g1; -- h2 = (h2 & mask) | g2; -- h3 = (h3 & mask) | g3; -- h4 = (h4 & mask) | g4; -- -- /* h = h % (2^128) */ -- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); -- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); -- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); -- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); --} --EXPORT_SYMBOL_GPL(poly1305_core_emit); -- - int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -@@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_d - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); -- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0); -+ poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); - } - - poly1305_core_emit(&dctx->h, digest); ---- /dev/null -+++ b/include/crypto/internal/poly1305.h -@@ -0,0 +1,67 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Common values for the Poly1305 algorithm -+ */ -+ -+#ifndef _CRYPTO_INTERNAL_POLY1305_H -+#define _CRYPTO_INTERNAL_POLY1305_H -+ -+#include <asm/unaligned.h> -+#include <linux/types.h> -+#include <crypto/poly1305.h> -+ -+struct shash_desc; -+ -+/* -+ * Poly1305 core functions. These implement the ε-almost-∆-universal hash -+ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce -+ * ("s key") at the end. They also only support block-aligned inputs. -+ */ -+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); -+static inline void poly1305_core_init(struct poly1305_state *state) -+{ -+ *state = (struct poly1305_state){}; -+} -+ -+void poly1305_core_blocks(struct poly1305_state *state, -+ const struct poly1305_key *key, const void *src, -+ unsigned int nblocks, u32 hibit); -+void poly1305_core_emit(const struct poly1305_state *state, void *dst); -+ -+/* Crypto API helper functions for the Poly1305 MAC */ -+int crypto_poly1305_init(struct shash_desc *desc); -+ -+int crypto_poly1305_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen); -+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); -+ -+/* -+ * Poly1305 requires a unique key for each tag, which implies that we can't set -+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we -+ * expect the key as the first 32 bytes in the update() call. -+ */ -+static inline -+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -+ const u8 *src, unsigned int srclen) -+{ -+ if (!dctx->sset) { -+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -+ poly1305_core_setkey(&dctx->r, src); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->rset = true; -+ } -+ if (srclen >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ } -+ return srclen; -+} -+ -+#endif ---- a/include/crypto/poly1305.h -+++ b/include/crypto/poly1305.h -@@ -38,27 +38,4 @@ struct poly1305_desc_ctx { - bool sset; - }; - --/* -- * Poly1305 core functions. These implement the ε-almost-∆-universal hash -- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce -- * ("s key") at the end. They also only support block-aligned inputs. -- */ --void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); --static inline void poly1305_core_init(struct poly1305_state *state) --{ -- memset(state->h, 0, sizeof(state->h)); --} --void poly1305_core_blocks(struct poly1305_state *state, -- const struct poly1305_key *key, -- const void *src, unsigned int nblocks); --void poly1305_core_emit(const struct poly1305_state *state, void *dst); -- --/* Crypto API helper functions for the Poly1305 MAC */ --int crypto_poly1305_init(struct shash_desc *desc); --unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen); --int crypto_poly1305_update(struct shash_desc *desc, -- const u8 *src, unsigned int srclen); --int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); -- - #endif ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA - config CRYPTO_LIB_DES - tristate - -+config CRYPTO_LIB_POLY1305_GENERIC -+ tristate -+ - config CRYPTO_LIB_SHA256 - tristate ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -13,5 +13,8 @@ libarc4-y := arc4.o - obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o - libdes-y := des.o - -+obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o -+libpoly1305-y := poly1305.o -+ - obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o - libsha256-y := sha256.o ---- /dev/null -+++ b/lib/crypto/poly1305.c -@@ -0,0 +1,158 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Poly1305 authenticator algorithm, RFC7539 -+ * -+ * Copyright (C) 2015 Martin Willi -+ * -+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein. -+ */ -+ -+#include <crypto/internal/poly1305.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <asm/unaligned.h> -+ -+static inline u64 mlt(u64 a, u64 b) -+{ -+ return a * b; -+} -+ -+static inline u32 sr(u64 v, u_char n) -+{ -+ return v >> n; -+} -+ -+static inline u32 and(u32 v, u32 mask) -+{ -+ return v & mask; -+} -+ -+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) -+{ -+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; -+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; -+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; -+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; -+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; -+} -+EXPORT_SYMBOL_GPL(poly1305_core_setkey); -+ -+void poly1305_core_blocks(struct poly1305_state *state, -+ const struct poly1305_key *key, const void *src, -+ unsigned int nblocks, u32 hibit) -+{ -+ u32 r0, r1, r2, r3, r4; -+ u32 s1, s2, s3, s4; -+ u32 h0, h1, h2, h3, h4; -+ u64 d0, d1, d2, d3, d4; -+ -+ if (!nblocks) -+ return; -+ -+ r0 = key->r[0]; -+ r1 = key->r[1]; -+ r2 = key->r[2]; -+ r3 = key->r[3]; -+ r4 = key->r[4]; -+ -+ s1 = r1 * 5; -+ s2 = r2 * 5; -+ s3 = r3 * 5; -+ s4 = r4 * 5; -+ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ do { -+ /* h += m[i] */ -+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; -+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; -+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; -+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; -+ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); -+ -+ /* h *= r */ -+ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + -+ mlt(h3, s2) + mlt(h4, s1); -+ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + -+ mlt(h3, s3) + mlt(h4, s2); -+ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + -+ mlt(h3, s4) + mlt(h4, s3); -+ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + -+ mlt(h3, r0) + mlt(h4, s4); -+ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + -+ mlt(h3, r1) + mlt(h4, r0); -+ -+ /* (partial) h %= p */ -+ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); -+ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); -+ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); -+ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); -+ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); -+ h1 += h0 >> 26; h0 = h0 & 0x3ffffff; -+ -+ src += POLY1305_BLOCK_SIZE; -+ } while (--nblocks); -+ -+ state->h[0] = h0; -+ state->h[1] = h1; -+ state->h[2] = h2; -+ state->h[3] = h3; -+ state->h[4] = h4; -+} -+EXPORT_SYMBOL_GPL(poly1305_core_blocks); -+ -+void poly1305_core_emit(const struct poly1305_state *state, void *dst) -+{ -+ u32 h0, h1, h2, h3, h4; -+ u32 g0, g1, g2, g3, g4; -+ u32 mask; -+ -+ /* fully carry h */ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; -+ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; -+ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; -+ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; -+ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; -+ -+ /* compute h + -p */ -+ g0 = h0 + 5; -+ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; -+ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; -+ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; -+ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; -+ -+ /* select h if h < p, or h + -p if h >= p */ -+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -+ g0 &= mask; -+ g1 &= mask; -+ g2 &= mask; -+ g3 &= mask; -+ g4 &= mask; -+ mask = ~mask; -+ h0 = (h0 & mask) | g0; -+ h1 = (h1 & mask) | g1; -+ h2 = (h2 & mask) | g2; -+ h3 = (h3 & mask) | g3; -+ h4 = (h4 & mask) | g4; -+ -+ /* h = h % (2^128) */ -+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); -+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); -+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); -+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); -+} -+EXPORT_SYMBOL_GPL(poly1305_core_emit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0014-crypto-x86-poly1305-unify-Poly1305-state-struct-with.patch b/target/linux/generic/backport-5.4/080-wireguard-0014-crypto-x86-poly1305-unify-Poly1305-state-struct-with.patch deleted file mode 100644 index 7d237549b0..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0014-crypto-x86-poly1305-unify-Poly1305-state-struct-with.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:20 +0100 -Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with - generic code - -commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream. - -In preparation of exposing a Poly1305 library interface directly from -the accelerated x86 driver, align the state descriptor of the x86 code -with the one used by the generic driver. This is needed to make the -library interface unified between all implementations. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 88 ++++++++++-------------------- - crypto/poly1305_generic.c | 6 +- - include/crypto/internal/poly1305.h | 4 +- - include/crypto/poly1305.h | 18 +++--- - 4 files changed, 43 insertions(+), 73 deletions(-) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -14,40 +14,14 @@ - #include <linux/module.h> - #include <asm/simd.h> - --struct poly1305_simd_desc_ctx { -- struct poly1305_desc_ctx base; -- /* derived key u set? */ -- bool uset; --#ifdef CONFIG_AS_AVX2 -- /* derived keys r^3, r^4 set? */ -- bool wset; --#endif -- /* derived Poly1305 key r^2 */ -- u32 u[5]; -- /* ... silently appended r^3 and r^4 when using AVX2 */ --}; -- - asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, - const u32 *r, unsigned int blocks); - asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, - unsigned int blocks, const u32 *u); --#ifdef CONFIG_AS_AVX2 - asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, - unsigned int blocks, const u32 *u); --static bool poly1305_use_avx2; --#endif - --static int poly1305_simd_init(struct shash_desc *desc) --{ -- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc); -- -- sctx->uset = false; --#ifdef CONFIG_AS_AVX2 -- sctx->wset = false; --#endif -- -- return crypto_poly1305_init(desc); --} -+static bool poly1305_use_avx2 __ro_after_init; - - static void poly1305_simd_mult(u32 *a, const u32 *b) - { -@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c - static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) - { -- struct poly1305_simd_desc_ctx *sctx; - unsigned int blocks, datalen; - -- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base)); -- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base); -- - if (unlikely(!dctx->sset)) { - datalen = crypto_poly1305_setdesckey(dctx, src, srclen); - src += srclen - datalen; - srclen = datalen; - } - --#ifdef CONFIG_AS_AVX2 -- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { -- if (unlikely(!sctx->wset)) { -- if (!sctx->uset) { -- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); -- poly1305_simd_mult(sctx->u, dctx->r.r); -- sctx->uset = true; -+ if (IS_ENABLED(CONFIG_AS_AVX2) && -+ poly1305_use_avx2 && -+ srclen >= POLY1305_BLOCK_SIZE * 4) { -+ if (unlikely(dctx->rset < 4)) { -+ if (dctx->rset < 2) { -+ dctx->r[1] = dctx->r[0]; -+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); - } -- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u)); -- poly1305_simd_mult(sctx->u + 5, dctx->r.r); -- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u)); -- poly1305_simd_mult(sctx->u + 10, dctx->r.r); -- sctx->wset = true; -+ dctx->r[2] = dctx->r[1]; -+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); -+ dctx->r[3] = dctx->r[2]; -+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); -+ dctx->rset = 4; - } - blocks = srclen / (POLY1305_BLOCK_SIZE * 4); -- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks, -- sctx->u); -+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, -+ dctx->r[1].r); - src += POLY1305_BLOCK_SIZE * 4 * blocks; - srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; - } --#endif -+ - if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { -- if (unlikely(!sctx->uset)) { -- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u)); -- poly1305_simd_mult(sctx->u, dctx->r.r); -- sctx->uset = true; -+ if (unlikely(dctx->rset < 2)) { -+ dctx->r[1] = dctx->r[0]; -+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); -+ dctx->rset = 2; - } - blocks = srclen / (POLY1305_BLOCK_SIZE * 2); -- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks, -- sctx->u); -+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, -+ blocks, dctx->r[1].r); - src += POLY1305_BLOCK_SIZE * 2 * blocks; - srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; - } - if (srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1); -+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); - srclen -= POLY1305_BLOCK_SIZE; - } - return srclen; -@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s - - static struct shash_alg alg = { - .digestsize = POLY1305_DIGEST_SIZE, -- .init = poly1305_simd_init, -+ .init = crypto_poly1305_init, - .update = poly1305_simd_update, - .final = crypto_poly1305_final, -- .descsize = sizeof(struct poly1305_simd_desc_ctx), -+ .descsize = sizeof(struct poly1305_desc_ctx), - .base = { - .cra_name = "poly1305", - .cra_driver_name = "poly1305-simd", -@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init - if (!boot_cpu_has(X86_FEATURE_XMM2)) - return -ENODEV; - --#ifdef CONFIG_AS_AVX2 -- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && -+ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) && -+ boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); -- alg.descsize = sizeof(struct poly1305_simd_desc_ctx); -+ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32); - if (poly1305_use_avx2) - alg.descsize += 10 * sizeof(u32); --#endif -+ - return crypto_register_shash(&alg); - } - ---- a/crypto/poly1305_generic.c -+++ b/crypto/poly1305_generic.c -@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de - - poly1305_core_init(&dctx->h); - dctx->buflen = 0; -- dctx->rset = false; -+ dctx->rset = 0; - dctx->sset = false; - - return 0; -@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1 - srclen = datalen; - } - -- poly1305_core_blocks(&dctx->h, &dctx->r, src, -+ poly1305_core_blocks(&dctx->h, dctx->r, src, - srclen / POLY1305_BLOCK_SIZE, 1); - } - -@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, - POLY1305_BLOCK_SIZE - dctx->buflen); -- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0); -+ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0); - } - - poly1305_core_emit(&dctx->h, digest); ---- a/include/crypto/internal/poly1305.h -+++ b/include/crypto/internal/poly1305.h -@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey( - { - if (!dctx->sset) { - if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_core_setkey(&dctx->r, src); -+ poly1305_core_setkey(dctx->r, src); - src += POLY1305_BLOCK_SIZE; - srclen -= POLY1305_BLOCK_SIZE; -- dctx->rset = true; -+ dctx->rset = 1; - } - if (srclen >= POLY1305_BLOCK_SIZE) { - dctx->s[0] = get_unaligned_le32(src + 0); ---- a/include/crypto/poly1305.h -+++ b/include/crypto/poly1305.h -@@ -22,20 +22,20 @@ struct poly1305_state { - }; - - struct poly1305_desc_ctx { -- /* key */ -- struct poly1305_key r; -- /* finalize key */ -- u32 s[4]; -- /* accumulator */ -- struct poly1305_state h; - /* partial buffer */ - u8 buf[POLY1305_BLOCK_SIZE]; - /* bytes used in partial buffer */ - unsigned int buflen; -- /* r key has been set */ -- bool rset; -- /* s key has been set */ -+ /* how many keys have been set in r[] */ -+ unsigned short rset; -+ /* whether s[] has been set */ - bool sset; -+ /* finalize key */ -+ u32 s[4]; -+ /* accumulator */ -+ struct poly1305_state h; -+ /* key */ -+ struct poly1305_key r[1]; - }; - - #endif diff --git a/target/linux/generic/backport-5.4/080-wireguard-0015-crypto-poly1305-expose-init-update-final-library-int.patch b/target/linux/generic/backport-5.4/080-wireguard-0015-crypto-poly1305-expose-init-update-final-library-int.patch deleted file mode 100644 index bf8e90bf02..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0015-crypto-poly1305-expose-init-update-final-library-int.patch +++ /dev/null @@ -1,224 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:21 +0100 -Subject: [PATCH] crypto: poly1305 - expose init/update/final library interface - -commit a1d93064094cc5e24d64e35cf093e7191d0c9344 upstream. - -Expose the existing generic Poly1305 code via a init/update/final -library interface so that callers are not required to go through -the crypto API's shash abstraction to access it. At the same time, -make some preparations so that the library implementation can be -superseded by an accelerated arch-specific version in the future. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/poly1305_generic.c | 22 +----------- - include/crypto/poly1305.h | 38 +++++++++++++++++++- - lib/crypto/Kconfig | 26 ++++++++++++++ - lib/crypto/poly1305.c | 74 +++++++++++++++++++++++++++++++++++++++ - 4 files changed, 138 insertions(+), 22 deletions(-) - ---- a/crypto/poly1305_generic.c -+++ b/crypto/poly1305_generic.c -@@ -85,31 +85,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update - int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -- __le32 digest[4]; -- u64 f = 0; - - if (unlikely(!dctx->sset)) - return -ENOKEY; - -- if (unlikely(dctx->buflen)) { -- dctx->buf[dctx->buflen++] = 1; -- memset(dctx->buf + dctx->buflen, 0, -- POLY1305_BLOCK_SIZE - dctx->buflen); -- poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0); -- } -- -- poly1305_core_emit(&dctx->h, digest); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0]; -- put_unaligned_le32(f, dst + 0); -- f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1]; -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2]; -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3]; -- put_unaligned_le32(f, dst + 12); -- -+ poly1305_final_generic(dctx, dst); - return 0; - } - EXPORT_SYMBOL_GPL(crypto_poly1305_final); ---- a/include/crypto/poly1305.h -+++ b/include/crypto/poly1305.h -@@ -35,7 +35,43 @@ struct poly1305_desc_ctx { - /* accumulator */ - struct poly1305_state h; - /* key */ -- struct poly1305_key r[1]; -+ struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; - }; - -+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); -+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key); -+ -+static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) -+ poly1305_init_arch(desc, key); -+ else -+ poly1305_init_generic(desc, key); -+} -+ -+void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src, -+ unsigned int nbytes); -+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, -+ unsigned int nbytes); -+ -+static inline void poly1305_update(struct poly1305_desc_ctx *desc, -+ const u8 *src, unsigned int nbytes) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) -+ poly1305_update_arch(desc, src, nbytes); -+ else -+ poly1305_update_generic(desc, src, nbytes); -+} -+ -+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest); -+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest); -+ -+static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) -+ poly1305_final_arch(desc, digest); -+ else -+ poly1305_final_generic(desc, digest); -+} -+ - #endif ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -37,8 +37,34 @@ config CRYPTO_LIB_CHACHA - config CRYPTO_LIB_DES - tristate - -+config CRYPTO_LIB_POLY1305_RSIZE -+ int -+ default 1 -+ -+config CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ tristate -+ help -+ Declares whether the architecture provides an arch-specific -+ accelerated implementation of the Poly1305 library interface, -+ either builtin or as a module. -+ - config CRYPTO_LIB_POLY1305_GENERIC - tristate -+ help -+ This symbol can be depended upon by arch implementations of the -+ Poly1305 library interface that require the generic code as a -+ fallback, e.g., for SIMD implementations. If no arch specific -+ implementation is enabled, this implementation serves the users -+ of CRYPTO_LIB_POLY1305. -+ -+config CRYPTO_LIB_POLY1305 -+ tristate "Poly1305 library interface" -+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n -+ help -+ Enable the Poly1305 library interface. This interface may be fulfilled -+ by either the generic implementation or an arch-specific one, if one -+ is available and enabled. - - config CRYPTO_LIB_SHA256 - tristate ---- a/lib/crypto/poly1305.c -+++ b/lib/crypto/poly1305.c -@@ -154,5 +154,79 @@ void poly1305_core_emit(const struct pol - } - EXPORT_SYMBOL_GPL(poly1305_core_emit); - -+void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) -+{ -+ poly1305_core_setkey(desc->r, key); -+ desc->s[0] = get_unaligned_le32(key + 16); -+ desc->s[1] = get_unaligned_le32(key + 20); -+ desc->s[2] = get_unaligned_le32(key + 24); -+ desc->s[3] = get_unaligned_le32(key + 28); -+ poly1305_core_init(&desc->h); -+ desc->buflen = 0; -+ desc->sset = true; -+ desc->rset = 1; -+} -+EXPORT_SYMBOL_GPL(poly1305_init_generic); -+ -+void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src, -+ unsigned int nbytes) -+{ -+ unsigned int bytes; -+ -+ if (unlikely(desc->buflen)) { -+ bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen); -+ memcpy(desc->buf + desc->buflen, src, bytes); -+ src += bytes; -+ nbytes -= bytes; -+ desc->buflen += bytes; -+ -+ if (desc->buflen == POLY1305_BLOCK_SIZE) { -+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1); -+ desc->buflen = 0; -+ } -+ } -+ -+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { -+ poly1305_core_blocks(&desc->h, desc->r, src, -+ nbytes / POLY1305_BLOCK_SIZE, 1); -+ src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); -+ nbytes %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(nbytes)) { -+ desc->buflen = nbytes; -+ memcpy(desc->buf, src, nbytes); -+ } -+} -+EXPORT_SYMBOL_GPL(poly1305_update_generic); -+ -+void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) -+{ -+ __le32 digest[4]; -+ u64 f = 0; -+ -+ if (unlikely(desc->buflen)) { -+ desc->buf[desc->buflen++] = 1; -+ memset(desc->buf + desc->buflen, 0, -+ POLY1305_BLOCK_SIZE - desc->buflen); -+ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0); -+ } -+ -+ poly1305_core_emit(&desc->h, digest); -+ -+ /* mac = (h + s) % (2^128) */ -+ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; -+ put_unaligned_le32(f, dst + 0); -+ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; -+ put_unaligned_le32(f, dst + 4); -+ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; -+ put_unaligned_le32(f, dst + 8); -+ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; -+ put_unaligned_le32(f, dst + 12); -+ -+ *desc = (struct poly1305_desc_ctx){}; -+} -+EXPORT_SYMBOL_GPL(poly1305_final_generic); -+ - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0016-crypto-x86-poly1305-depend-on-generic-library-not-ge.patch b/target/linux/generic/backport-5.4/080-wireguard-0016-crypto-x86-poly1305-depend-on-generic-library-not-ge.patch deleted file mode 100644 index 8ea63f3b91..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0016-crypto-x86-poly1305-depend-on-generic-library-not-ge.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:22 +0100 -Subject: [PATCH] crypto: x86/poly1305 - depend on generic library not generic - shash - -commit 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 upstream. - -Remove the dependency on the generic Poly1305 driver. Instead, depend -on the generic library so that we only reuse code without pulling in -the generic skcipher implementation as well. - -While at it, remove the logic that prefers the non-SIMD path for short -inputs - this is no longer necessary after recent FPU handling changes -on x86. - -Since this removes the last remaining user of the routines exported -by the generic shash driver, unexport them and make them static. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 66 +++++++++++++++++++++++++----- - crypto/Kconfig | 2 +- - crypto/poly1305_generic.c | 11 ++--- - include/crypto/internal/poly1305.h | 9 ---- - 4 files changed, 60 insertions(+), 28 deletions(-) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, c - poly1305_block_sse2(a, m, b, 1); - } - -+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, -+ const u8 *src, unsigned int srclen) -+{ -+ unsigned int datalen; -+ -+ if (unlikely(!dctx->sset)) { -+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen); -+ src += srclen - datalen; -+ srclen = datalen; -+ } -+ if (srclen >= POLY1305_BLOCK_SIZE) { -+ poly1305_core_blocks(&dctx->h, dctx->r, src, -+ srclen / POLY1305_BLOCK_SIZE, 1); -+ srclen %= POLY1305_BLOCK_SIZE; -+ } -+ return srclen; -+} -+ - static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) - { -@@ -91,12 +109,6 @@ static int poly1305_simd_update(struct s - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; - -- /* kernel_fpu_begin/end is costly, use fallback for small updates */ -- if (srclen <= 288 || !crypto_simd_usable()) -- return crypto_poly1305_update(desc, src, srclen); -- -- kernel_fpu_begin(); -- - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); - memcpy(dctx->buf + dctx->buflen, src, bytes); -@@ -105,25 +117,57 @@ static int poly1305_simd_update(struct s - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { -- poly1305_simd_blocks(dctx, dctx->buf, -- POLY1305_BLOCK_SIZE); -+ if (likely(crypto_simd_usable())) { -+ kernel_fpu_begin(); -+ poly1305_simd_blocks(dctx, dctx->buf, -+ POLY1305_BLOCK_SIZE); -+ kernel_fpu_end(); -+ } else { -+ poly1305_scalar_blocks(dctx, dctx->buf, -+ POLY1305_BLOCK_SIZE); -+ } - dctx->buflen = 0; - } - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { -- bytes = poly1305_simd_blocks(dctx, src, srclen); -+ if (likely(crypto_simd_usable())) { -+ kernel_fpu_begin(); -+ bytes = poly1305_simd_blocks(dctx, src, srclen); -+ kernel_fpu_end(); -+ } else { -+ bytes = poly1305_scalar_blocks(dctx, src, srclen); -+ } - src += srclen - bytes; - srclen = bytes; - } - -- kernel_fpu_end(); -- - if (unlikely(srclen)) { - dctx->buflen = srclen; - memcpy(dctx->buf, src, srclen); - } -+} -+ -+static int crypto_poly1305_init(struct shash_desc *desc) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ poly1305_core_init(&dctx->h); -+ dctx->buflen = 0; -+ dctx->rset = 0; -+ dctx->sset = false; -+ -+ return 0; -+} -+ -+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (unlikely(!dctx->sset)) -+ return -ENOKEY; - -+ poly1305_final_generic(dctx, dst); - return 0; - } - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -697,7 +697,7 @@ config CRYPTO_POLY1305 - config CRYPTO_POLY1305_X86_64 - tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)" - depends on X86 && 64BIT -- select CRYPTO_POLY1305 -+ select CRYPTO_LIB_POLY1305_GENERIC - help - Poly1305 authenticator algorithm, RFC7539. - ---- a/crypto/poly1305_generic.c -+++ b/crypto/poly1305_generic.c -@@ -19,7 +19,7 @@ - #include <linux/module.h> - #include <asm/unaligned.h> - --int crypto_poly1305_init(struct shash_desc *desc) -+static int crypto_poly1305_init(struct shash_desc *desc) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - -@@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_de - - return 0; - } --EXPORT_SYMBOL_GPL(crypto_poly1305_init); - - static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) -@@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1 - srclen / POLY1305_BLOCK_SIZE, 1); - } - --int crypto_poly1305_update(struct shash_desc *desc, -- const u8 *src, unsigned int srclen) -+static int crypto_poly1305_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; -@@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_ - - return 0; - } --EXPORT_SYMBOL_GPL(crypto_poly1305_update); - --int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) -+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - -@@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_d - poly1305_final_generic(dctx, dst); - return 0; - } --EXPORT_SYMBOL_GPL(crypto_poly1305_final); - - static struct shash_alg poly1305_alg = { - .digestsize = POLY1305_DIGEST_SIZE, ---- a/include/crypto/internal/poly1305.h -+++ b/include/crypto/internal/poly1305.h -@@ -10,8 +10,6 @@ - #include <linux/types.h> - #include <crypto/poly1305.h> - --struct shash_desc; -- - /* - * Poly1305 core functions. These implement the ε-almost-∆-universal hash - * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce -@@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly130 - unsigned int nblocks, u32 hibit); - void poly1305_core_emit(const struct poly1305_state *state, void *dst); - --/* Crypto API helper functions for the Poly1305 MAC */ --int crypto_poly1305_init(struct shash_desc *desc); -- --int crypto_poly1305_update(struct shash_desc *desc, -- const u8 *src, unsigned int srclen); --int crypto_poly1305_final(struct shash_desc *desc, u8 *dst); -- - /* - * Poly1305 requires a unique key for each tag, which implies that we can't set - * it on the tfm that gets accessed by multiple users simultaneously. Instead we diff --git a/target/linux/generic/backport-5.4/080-wireguard-0017-crypto-x86-poly1305-expose-existing-driver-as-poly13.patch b/target/linux/generic/backport-5.4/080-wireguard-0017-crypto-x86-poly1305-expose-existing-driver-as-poly13.patch deleted file mode 100644 index 6514987b4d..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0017-crypto-x86-poly1305-expose-existing-driver-as-poly13.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:23 +0100 -Subject: [PATCH] crypto: x86/poly1305 - expose existing driver as poly1305 - library - -commit f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac upstream. - -Implement the arch init/update/final Poly1305 library routines in the -accelerated SIMD driver for x86 so they are accessible to users of -the Poly1305 library interface as well. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 57 ++++++++++++++++++++++++--------- - crypto/Kconfig | 1 + - lib/crypto/Kconfig | 1 + - 3 files changed, 43 insertions(+), 16 deletions(-) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -10,6 +10,7 @@ - #include <crypto/internal/poly1305.h> - #include <crypto/internal/simd.h> - #include <linux/crypto.h> -+#include <linux/jump_label.h> - #include <linux/kernel.h> - #include <linux/module.h> - #include <asm/simd.h> -@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 - asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, - unsigned int blocks, const u32 *u); - --static bool poly1305_use_avx2 __ro_after_init; -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); - - static void poly1305_simd_mult(u32 *a, const u32 *b) - { -@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks - } - - if (IS_ENABLED(CONFIG_AS_AVX2) && -- poly1305_use_avx2 && -+ static_branch_likely(&poly1305_use_avx2) && - srclen >= POLY1305_BLOCK_SIZE * 4) { - if (unlikely(dctx->rset < 4)) { - if (dctx->rset < 2) { -@@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks - return srclen; - } - --static int poly1305_simd_update(struct shash_desc *desc, -- const u8 *src, unsigned int srclen) -+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) -+{ -+ poly1305_init_generic(desc, key); -+} -+EXPORT_SYMBOL(poly1305_init_arch); -+ -+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, -+ unsigned int srclen) - { -- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - unsigned int bytes; - - if (unlikely(dctx->buflen)) { -@@ -117,7 +124,8 @@ static int poly1305_simd_update(struct s - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { -- if (likely(crypto_simd_usable())) { -+ if (static_branch_likely(&poly1305_use_simd) && -+ likely(crypto_simd_usable())) { - kernel_fpu_begin(); - poly1305_simd_blocks(dctx, dctx->buf, - POLY1305_BLOCK_SIZE); -@@ -131,7 +139,8 @@ static int poly1305_simd_update(struct s - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { -- if (likely(crypto_simd_usable())) { -+ if (static_branch_likely(&poly1305_use_simd) && -+ likely(crypto_simd_usable())) { - kernel_fpu_begin(); - bytes = poly1305_simd_blocks(dctx, src, srclen); - kernel_fpu_end(); -@@ -147,6 +156,13 @@ static int poly1305_simd_update(struct s - memcpy(dctx->buf, src, srclen); - } - } -+EXPORT_SYMBOL(poly1305_update_arch); -+ -+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) -+{ -+ poly1305_final_generic(desc, digest); -+} -+EXPORT_SYMBOL(poly1305_final_arch); - - static int crypto_poly1305_init(struct shash_desc *desc) - { -@@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct - return 0; - } - -+static int poly1305_simd_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ poly1305_update_arch(dctx, src, srclen); -+ return 0; -+} -+ - static struct shash_alg alg = { - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, -@@ -189,15 +214,15 @@ static struct shash_alg alg = { - static int __init poly1305_simd_mod_init(void) - { - if (!boot_cpu_has(X86_FEATURE_XMM2)) -- return -ENODEV; -+ return 0; - -- poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) && -- boot_cpu_has(X86_FEATURE_AVX) && -- boot_cpu_has(X86_FEATURE_AVX2) && -- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); -- alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32); -- if (poly1305_use_avx2) -- alg.descsize += 10 * sizeof(u32); -+ static_branch_enable(&poly1305_use_simd); -+ -+ if (IS_ENABLED(CONFIG_AS_AVX2) && -+ boot_cpu_has(X86_FEATURE_AVX) && -+ boot_cpu_has(X86_FEATURE_AVX2) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) -+ static_branch_enable(&poly1305_use_avx2); - - return crypto_register_shash(&alg); - } ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -698,6 +698,7 @@ config CRYPTO_POLY1305_X86_64 - tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)" - depends on X86 && 64BIT - select CRYPTO_LIB_POLY1305_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_POLY1305 - help - Poly1305 authenticator algorithm, RFC7539. - ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES - - config CRYPTO_LIB_POLY1305_RSIZE - int -+ default 4 if X86_64 - default 1 - - config CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0018-crypto-arm64-poly1305-incorporate-OpenSSL-CRYPTOGAMS.patch b/target/linux/generic/backport-5.4/080-wireguard-0018-crypto-arm64-poly1305-incorporate-OpenSSL-CRYPTOGAMS.patch deleted file mode 100644 index 464c6568f6..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0018-crypto-arm64-poly1305-incorporate-OpenSSL-CRYPTOGAMS.patch +++ /dev/null @@ -1,2083 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:24 +0100 -Subject: [PATCH] crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON - implementation - -commit f569ca16475155013525686d0f73bc379c67e635 upstream. - -This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation -for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL -project. The file 'poly1305-armv8.pl' is taken straight from this upstream -GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f, -and already contains all the changes required to build it as part of a -Linux kernel module. - -[0] https://github.com/dot-asm/cryptogams - -Co-developed-by: Andy Polyakov <appro@cryptogams.org> -Signed-off-by: Andy Polyakov <appro@cryptogams.org> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm64/crypto/Kconfig | 6 + - arch/arm64/crypto/Makefile | 10 +- - arch/arm64/crypto/poly1305-armv8.pl | 913 ++++++++++++++++++++++ - arch/arm64/crypto/poly1305-core.S_shipped | 835 ++++++++++++++++++++ - arch/arm64/crypto/poly1305-glue.c | 237 ++++++ - lib/crypto/Kconfig | 1 + - 6 files changed, 2001 insertions(+), 1 deletion(-) - create mode 100644 arch/arm64/crypto/poly1305-armv8.pl - create mode 100644 arch/arm64/crypto/poly1305-core.S_shipped - create mode 100644 arch/arm64/crypto/poly1305-glue.c - ---- a/arch/arm64/crypto/Kconfig -+++ b/arch/arm64/crypto/Kconfig -@@ -106,6 +106,12 @@ config CRYPTO_CHACHA20_NEON - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA - -+config CRYPTO_POLY1305_NEON -+ tristate "Poly1305 hash function using scalar or NEON instructions" -+ depends on KERNEL_MODE_NEON -+ select CRYPTO_HASH -+ select CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ - config CRYPTO_NHPOLY1305_NEON - tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)" - depends on KERNEL_MODE_NEON ---- a/arch/arm64/crypto/Makefile -+++ b/arch/arm64/crypto/Makefile -@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-c - obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o - chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o - -+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o -+poly1305-neon-y := poly1305-core.o poly1305-glue.o -+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64 -+ - obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o - nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o - -@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO - quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) void $(@) - -+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl -+ $(call cmd,perlasm) -+ - $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl - $(call cmd,perlasm) - - $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl - $(call cmd,perlasm) -+ - endif - --clean-files += sha256-core.S sha512-core.S -+clean-files += poly1305-core.S sha256-core.S sha512-core.S ---- /dev/null -+++ b/arch/arm64/crypto/poly1305-armv8.pl -@@ -0,0 +1,913 @@ -+#!/usr/bin/env perl -+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause -+# -+# ==================================================================== -+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL -+# project. -+# ==================================================================== -+# -+# This module implements Poly1305 hash for ARMv8. -+# -+# June 2015 -+# -+# Numbers are cycles per processed byte with poly1305_blocks alone. -+# -+# IALU/gcc-4.9 NEON -+# -+# Apple A7 1.86/+5% 0.72 -+# Cortex-A53 2.69/+58% 1.47 -+# Cortex-A57 2.70/+7% 1.14 -+# Denver 1.64/+50% 1.18(*) -+# X-Gene 2.13/+68% 2.27 -+# Mongoose 1.77/+75% 1.12 -+# Kryo 2.70/+55% 1.13 -+# ThunderX2 1.17/+95% 1.36 -+# -+# (*) estimate based on resources availability is less than 1.0, -+# i.e. measured result is worse than expected, presumably binary -+# translator is not almighty; -+ -+$flavour=shift; -+$output=shift; -+ -+if ($flavour && $flavour ne "void") { -+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or -+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or -+ die "can't locate arm-xlate.pl"; -+ -+ open STDOUT,"| \"$^X\" $xlate $flavour $output"; -+} else { -+ open STDOUT,">$output"; -+} -+ -+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3)); -+my ($mac,$nonce)=($inp,$len); -+ -+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14)); -+ -+$code.=<<___; -+#ifndef __KERNEL__ -+# include "arm_arch.h" -+.extern OPENSSL_armcap_P -+#endif -+ -+.text -+ -+// forward "declarations" are required for Apple -+.globl poly1305_blocks -+.globl poly1305_emit -+ -+.globl poly1305_init -+.type poly1305_init,%function -+.align 5 -+poly1305_init: -+ cmp $inp,xzr -+ stp xzr,xzr,[$ctx] // zero hash value -+ stp xzr,xzr,[$ctx,#16] // [along with is_base2_26] -+ -+ csel x0,xzr,x0,eq -+ b.eq .Lno_key -+ -+#ifndef __KERNEL__ -+ adrp x17,OPENSSL_armcap_P -+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P] -+#endif -+ -+ ldp $r0,$r1,[$inp] // load key -+ mov $s1,#0xfffffffc0fffffff -+ movk $s1,#0x0fff,lsl#48 -+#ifdef __AARCH64EB__ -+ rev $r0,$r0 // flip bytes -+ rev $r1,$r1 -+#endif -+ and $r0,$r0,$s1 // &=0ffffffc0fffffff -+ and $s1,$s1,#-4 -+ and $r1,$r1,$s1 // &=0ffffffc0ffffffc -+ mov w#$s1,#-1 -+ stp $r0,$r1,[$ctx,#32] // save key value -+ str w#$s1,[$ctx,#48] // impossible key power value -+ -+#ifndef __KERNEL__ -+ tst w17,#ARMV7_NEON -+ -+ adr $d0,.Lpoly1305_blocks -+ adr $r0,.Lpoly1305_blocks_neon -+ adr $d1,.Lpoly1305_emit -+ -+ csel $d0,$d0,$r0,eq -+ -+# ifdef __ILP32__ -+ stp w#$d0,w#$d1,[$len] -+# else -+ stp $d0,$d1,[$len] -+# endif -+#endif -+ mov x0,#1 -+.Lno_key: -+ ret -+.size poly1305_init,.-poly1305_init -+ -+.type poly1305_blocks,%function -+.align 5 -+poly1305_blocks: -+.Lpoly1305_blocks: -+ ands $len,$len,#-16 -+ b.eq .Lno_data -+ -+ ldp $h0,$h1,[$ctx] // load hash value -+ ldp $h2,x17,[$ctx,#16] // [along with is_base2_26] -+ ldp $r0,$r1,[$ctx,#32] // load key value -+ -+#ifdef __AARCH64EB__ -+ lsr $d0,$h0,#32 -+ mov w#$d1,w#$h0 -+ lsr $d2,$h1,#32 -+ mov w15,w#$h1 -+ lsr x16,$h2,#32 -+#else -+ mov w#$d0,w#$h0 -+ lsr $d1,$h0,#32 -+ mov w#$d2,w#$h1 -+ lsr x15,$h1,#32 -+ mov w16,w#$h2 -+#endif -+ -+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 -+ lsr $d1,$d2,#12 -+ adds $d0,$d0,$d2,lsl#52 -+ add $d1,$d1,x15,lsl#14 -+ adc $d1,$d1,xzr -+ lsr $d2,x16,#24 -+ adds $d1,$d1,x16,lsl#40 -+ adc $d2,$d2,xzr -+ -+ cmp x17,#0 // is_base2_26? -+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) -+ csel $h0,$h0,$d0,eq // choose between radixes -+ csel $h1,$h1,$d1,eq -+ csel $h2,$h2,$d2,eq -+ -+.Loop: -+ ldp $t0,$t1,[$inp],#16 // load input -+ sub $len,$len,#16 -+#ifdef __AARCH64EB__ -+ rev $t0,$t0 -+ rev $t1,$t1 -+#endif -+ adds $h0,$h0,$t0 // accumulate input -+ adcs $h1,$h1,$t1 -+ -+ mul $d0,$h0,$r0 // h0*r0 -+ adc $h2,$h2,$padbit -+ umulh $d1,$h0,$r0 -+ -+ mul $t0,$h1,$s1 // h1*5*r1 -+ umulh $t1,$h1,$s1 -+ -+ adds $d0,$d0,$t0 -+ mul $t0,$h0,$r1 // h0*r1 -+ adc $d1,$d1,$t1 -+ umulh $d2,$h0,$r1 -+ -+ adds $d1,$d1,$t0 -+ mul $t0,$h1,$r0 // h1*r0 -+ adc $d2,$d2,xzr -+ umulh $t1,$h1,$r0 -+ -+ adds $d1,$d1,$t0 -+ mul $t0,$h2,$s1 // h2*5*r1 -+ adc $d2,$d2,$t1 -+ mul $t1,$h2,$r0 // h2*r0 -+ -+ adds $d1,$d1,$t0 -+ adc $d2,$d2,$t1 -+ -+ and $t0,$d2,#-4 // final reduction -+ and $h2,$d2,#3 -+ add $t0,$t0,$d2,lsr#2 -+ adds $h0,$d0,$t0 -+ adcs $h1,$d1,xzr -+ adc $h2,$h2,xzr -+ -+ cbnz $len,.Loop -+ -+ stp $h0,$h1,[$ctx] // store hash value -+ stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26] -+ -+.Lno_data: -+ ret -+.size poly1305_blocks,.-poly1305_blocks -+ -+.type poly1305_emit,%function -+.align 5 -+poly1305_emit: -+.Lpoly1305_emit: -+ ldp $h0,$h1,[$ctx] // load hash base 2^64 -+ ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26] -+ ldp $t0,$t1,[$nonce] // load nonce -+ -+#ifdef __AARCH64EB__ -+ lsr $d0,$h0,#32 -+ mov w#$d1,w#$h0 -+ lsr $d2,$h1,#32 -+ mov w15,w#$h1 -+ lsr x16,$h2,#32 -+#else -+ mov w#$d0,w#$h0 -+ lsr $d1,$h0,#32 -+ mov w#$d2,w#$h1 -+ lsr x15,$h1,#32 -+ mov w16,w#$h2 -+#endif -+ -+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64 -+ lsr $d1,$d2,#12 -+ adds $d0,$d0,$d2,lsl#52 -+ add $d1,$d1,x15,lsl#14 -+ adc $d1,$d1,xzr -+ lsr $d2,x16,#24 -+ adds $d1,$d1,x16,lsl#40 -+ adc $d2,$d2,xzr -+ -+ cmp $r0,#0 // is_base2_26? -+ csel $h0,$h0,$d0,eq // choose between radixes -+ csel $h1,$h1,$d1,eq -+ csel $h2,$h2,$d2,eq -+ -+ adds $d0,$h0,#5 // compare to modulus -+ adcs $d1,$h1,xzr -+ adc $d2,$h2,xzr -+ -+ tst $d2,#-4 // see if it's carried/borrowed -+ -+ csel $h0,$h0,$d0,eq -+ csel $h1,$h1,$d1,eq -+ -+#ifdef __AARCH64EB__ -+ ror $t0,$t0,#32 // flip nonce words -+ ror $t1,$t1,#32 -+#endif -+ adds $h0,$h0,$t0 // accumulate nonce -+ adc $h1,$h1,$t1 -+#ifdef __AARCH64EB__ -+ rev $h0,$h0 // flip output bytes -+ rev $h1,$h1 -+#endif -+ stp $h0,$h1,[$mac] // write result -+ -+ ret -+.size poly1305_emit,.-poly1305_emit -+___ -+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8)); -+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13)); -+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18)); -+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23)); -+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28)); -+my ($T0,$T1,$MASK) = map("v$_",(29..31)); -+ -+my ($in2,$zeros)=("x16","x17"); -+my $is_base2_26 = $zeros; # borrow -+ -+$code.=<<___; -+.type poly1305_mult,%function -+.align 5 -+poly1305_mult: -+ mul $d0,$h0,$r0 // h0*r0 -+ umulh $d1,$h0,$r0 -+ -+ mul $t0,$h1,$s1 // h1*5*r1 -+ umulh $t1,$h1,$s1 -+ -+ adds $d0,$d0,$t0 -+ mul $t0,$h0,$r1 // h0*r1 -+ adc $d1,$d1,$t1 -+ umulh $d2,$h0,$r1 -+ -+ adds $d1,$d1,$t0 -+ mul $t0,$h1,$r0 // h1*r0 -+ adc $d2,$d2,xzr -+ umulh $t1,$h1,$r0 -+ -+ adds $d1,$d1,$t0 -+ mul $t0,$h2,$s1 // h2*5*r1 -+ adc $d2,$d2,$t1 -+ mul $t1,$h2,$r0 // h2*r0 -+ -+ adds $d1,$d1,$t0 -+ adc $d2,$d2,$t1 -+ -+ and $t0,$d2,#-4 // final reduction -+ and $h2,$d2,#3 -+ add $t0,$t0,$d2,lsr#2 -+ adds $h0,$d0,$t0 -+ adcs $h1,$d1,xzr -+ adc $h2,$h2,xzr -+ -+ ret -+.size poly1305_mult,.-poly1305_mult -+ -+.type poly1305_splat,%function -+.align 4 -+poly1305_splat: -+ and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x13,$h0,#26,#26 -+ extr x14,$h1,$h0,#52 -+ and x14,x14,#0x03ffffff -+ ubfx x15,$h1,#14,#26 -+ extr x16,$h2,$h1,#40 -+ -+ str w12,[$ctx,#16*0] // r0 -+ add w12,w13,w13,lsl#2 // r1*5 -+ str w13,[$ctx,#16*1] // r1 -+ add w13,w14,w14,lsl#2 // r2*5 -+ str w12,[$ctx,#16*2] // s1 -+ str w14,[$ctx,#16*3] // r2 -+ add w14,w15,w15,lsl#2 // r3*5 -+ str w13,[$ctx,#16*4] // s2 -+ str w15,[$ctx,#16*5] // r3 -+ add w15,w16,w16,lsl#2 // r4*5 -+ str w14,[$ctx,#16*6] // s3 -+ str w16,[$ctx,#16*7] // r4 -+ str w15,[$ctx,#16*8] // s4 -+ -+ ret -+.size poly1305_splat,.-poly1305_splat -+ -+#ifdef __KERNEL__ -+.globl poly1305_blocks_neon -+#endif -+.type poly1305_blocks_neon,%function -+.align 5 -+poly1305_blocks_neon: -+.Lpoly1305_blocks_neon: -+ ldr $is_base2_26,[$ctx,#24] -+ cmp $len,#128 -+ b.lo .Lpoly1305_blocks -+ -+ .inst 0xd503233f // paciasp -+ stp x29,x30,[sp,#-80]! -+ add x29,sp,#0 -+ -+ stp d8,d9,[sp,#16] // meet ABI requirements -+ stp d10,d11,[sp,#32] -+ stp d12,d13,[sp,#48] -+ stp d14,d15,[sp,#64] -+ -+ cbz $is_base2_26,.Lbase2_64_neon -+ -+ ldp w10,w11,[$ctx] // load hash value base 2^26 -+ ldp w12,w13,[$ctx,#8] -+ ldr w14,[$ctx,#16] -+ -+ tst $len,#31 -+ b.eq .Leven_neon -+ -+ ldp $r0,$r1,[$ctx,#32] // load key value -+ -+ add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64 -+ lsr $h1,x12,#12 -+ adds $h0,$h0,x12,lsl#52 -+ add $h1,$h1,x13,lsl#14 -+ adc $h1,$h1,xzr -+ lsr $h2,x14,#24 -+ adds $h1,$h1,x14,lsl#40 -+ adc $d2,$h2,xzr // can be partially reduced... -+ -+ ldp $d0,$d1,[$inp],#16 // load input -+ sub $len,$len,#16 -+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) -+ -+#ifdef __AARCH64EB__ -+ rev $d0,$d0 -+ rev $d1,$d1 -+#endif -+ adds $h0,$h0,$d0 // accumulate input -+ adcs $h1,$h1,$d1 -+ adc $h2,$h2,$padbit -+ -+ bl poly1305_mult -+ -+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x11,$h0,#26,#26 -+ extr x12,$h1,$h0,#52 -+ and x12,x12,#0x03ffffff -+ ubfx x13,$h1,#14,#26 -+ extr x14,$h2,$h1,#40 -+ -+ b .Leven_neon -+ -+.align 4 -+.Lbase2_64_neon: -+ ldp $r0,$r1,[$ctx,#32] // load key value -+ -+ ldp $h0,$h1,[$ctx] // load hash value base 2^64 -+ ldr $h2,[$ctx,#16] -+ -+ tst $len,#31 -+ b.eq .Linit_neon -+ -+ ldp $d0,$d1,[$inp],#16 // load input -+ sub $len,$len,#16 -+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) -+#ifdef __AARCH64EB__ -+ rev $d0,$d0 -+ rev $d1,$d1 -+#endif -+ adds $h0,$h0,$d0 // accumulate input -+ adcs $h1,$h1,$d1 -+ adc $h2,$h2,$padbit -+ -+ bl poly1305_mult -+ -+.Linit_neon: -+ ldr w17,[$ctx,#48] // first table element -+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x11,$h0,#26,#26 -+ extr x12,$h1,$h0,#52 -+ and x12,x12,#0x03ffffff -+ ubfx x13,$h1,#14,#26 -+ extr x14,$h2,$h1,#40 -+ -+ cmp w17,#-1 // is value impossible? -+ b.ne .Leven_neon -+ -+ fmov ${H0},x10 -+ fmov ${H1},x11 -+ fmov ${H2},x12 -+ fmov ${H3},x13 -+ fmov ${H4},x14 -+ -+ ////////////////////////////////// initialize r^n table -+ mov $h0,$r0 // r^1 -+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2) -+ mov $h1,$r1 -+ mov $h2,xzr -+ add $ctx,$ctx,#48+12 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^2 -+ sub $ctx,$ctx,#4 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^3 -+ sub $ctx,$ctx,#4 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^4 -+ sub $ctx,$ctx,#4 -+ bl poly1305_splat -+ sub $ctx,$ctx,#48 // restore original $ctx -+ b .Ldo_neon -+ -+.align 4 -+.Leven_neon: -+ fmov ${H0},x10 -+ fmov ${H1},x11 -+ fmov ${H2},x12 -+ fmov ${H3},x13 -+ fmov ${H4},x14 -+ -+.Ldo_neon: -+ ldp x8,x12,[$inp,#32] // inp[2:3] -+ subs $len,$len,#64 -+ ldp x9,x13,[$inp,#48] -+ add $in2,$inp,#96 -+ adr $zeros,.Lzeros -+ -+ lsl $padbit,$padbit,#24 -+ add x15,$ctx,#48 -+ -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ and x5,x9,#0x03ffffff -+ ubfx x6,x8,#26,#26 -+ ubfx x7,x9,#26,#26 -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ extr x8,x12,x8,#52 -+ extr x9,x13,x9,#52 -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ fmov $IN23_0,x4 -+ and x8,x8,#0x03ffffff -+ and x9,x9,#0x03ffffff -+ ubfx x10,x12,#14,#26 -+ ubfx x11,x13,#14,#26 -+ add x12,$padbit,x12,lsr#40 -+ add x13,$padbit,x13,lsr#40 -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ fmov $IN23_1,x6 -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ fmov $IN23_2,x8 -+ fmov $IN23_3,x10 -+ fmov $IN23_4,x12 -+ -+ ldp x8,x12,[$inp],#16 // inp[0:1] -+ ldp x9,x13,[$inp],#48 -+ -+ ld1 {$R0,$R1,$S1,$R2},[x15],#64 -+ ld1 {$S2,$R3,$S3,$R4},[x15],#64 -+ ld1 {$S4},[x15] -+ -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ and x5,x9,#0x03ffffff -+ ubfx x6,x8,#26,#26 -+ ubfx x7,x9,#26,#26 -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ extr x8,x12,x8,#52 -+ extr x9,x13,x9,#52 -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ fmov $IN01_0,x4 -+ and x8,x8,#0x03ffffff -+ and x9,x9,#0x03ffffff -+ ubfx x10,x12,#14,#26 -+ ubfx x11,x13,#14,#26 -+ add x12,$padbit,x12,lsr#40 -+ add x13,$padbit,x13,lsr#40 -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ fmov $IN01_1,x6 -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ movi $MASK.2d,#-1 -+ fmov $IN01_2,x8 -+ fmov $IN01_3,x10 -+ fmov $IN01_4,x12 -+ ushr $MASK.2d,$MASK.2d,#38 -+ -+ b.ls .Lskip_loop -+ -+.align 4 -+.Loop_neon: -+ //////////////////////////////////////////////////////////////// -+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 -+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r -+ // \___________________/ -+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 -+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r -+ // \___________________/ \____________________/ -+ // -+ // Note that we start with inp[2:3]*r^2. This is because it -+ // doesn't depend on reduction in previous iteration. -+ //////////////////////////////////////////////////////////////// -+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 -+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 -+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 -+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 -+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 -+ -+ subs $len,$len,#64 -+ umull $ACC4,$IN23_0,${R4}[2] -+ csel $in2,$zeros,$in2,lo -+ umull $ACC3,$IN23_0,${R3}[2] -+ umull $ACC2,$IN23_0,${R2}[2] -+ ldp x8,x12,[$in2],#16 // inp[2:3] (or zero) -+ umull $ACC1,$IN23_0,${R1}[2] -+ ldp x9,x13,[$in2],#48 -+ umull $ACC0,$IN23_0,${R0}[2] -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ -+ umlal $ACC4,$IN23_1,${R3}[2] -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ umlal $ACC3,$IN23_1,${R2}[2] -+ and x5,x9,#0x03ffffff -+ umlal $ACC2,$IN23_1,${R1}[2] -+ ubfx x6,x8,#26,#26 -+ umlal $ACC1,$IN23_1,${R0}[2] -+ ubfx x7,x9,#26,#26 -+ umlal $ACC0,$IN23_1,${S4}[2] -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ -+ umlal $ACC4,$IN23_2,${R2}[2] -+ extr x8,x12,x8,#52 -+ umlal $ACC3,$IN23_2,${R1}[2] -+ extr x9,x13,x9,#52 -+ umlal $ACC2,$IN23_2,${R0}[2] -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ umlal $ACC1,$IN23_2,${S4}[2] -+ fmov $IN23_0,x4 -+ umlal $ACC0,$IN23_2,${S3}[2] -+ and x8,x8,#0x03ffffff -+ -+ umlal $ACC4,$IN23_3,${R1}[2] -+ and x9,x9,#0x03ffffff -+ umlal $ACC3,$IN23_3,${R0}[2] -+ ubfx x10,x12,#14,#26 -+ umlal $ACC2,$IN23_3,${S4}[2] -+ ubfx x11,x13,#14,#26 -+ umlal $ACC1,$IN23_3,${S3}[2] -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ umlal $ACC0,$IN23_3,${S2}[2] -+ fmov $IN23_1,x6 -+ -+ add $IN01_2,$IN01_2,$H2 -+ add x12,$padbit,x12,lsr#40 -+ umlal $ACC4,$IN23_4,${R0}[2] -+ add x13,$padbit,x13,lsr#40 -+ umlal $ACC3,$IN23_4,${S4}[2] -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ umlal $ACC2,$IN23_4,${S3}[2] -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ umlal $ACC1,$IN23_4,${S2}[2] -+ fmov $IN23_2,x8 -+ umlal $ACC0,$IN23_4,${S1}[2] -+ fmov $IN23_3,x10 -+ -+ //////////////////////////////////////////////////////////////// -+ // (hash+inp[0:1])*r^4 and accumulate -+ -+ add $IN01_0,$IN01_0,$H0 -+ fmov $IN23_4,x12 -+ umlal $ACC3,$IN01_2,${R1}[0] -+ ldp x8,x12,[$inp],#16 // inp[0:1] -+ umlal $ACC0,$IN01_2,${S3}[0] -+ ldp x9,x13,[$inp],#48 -+ umlal $ACC4,$IN01_2,${R2}[0] -+ umlal $ACC1,$IN01_2,${S4}[0] -+ umlal $ACC2,$IN01_2,${R0}[0] -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ -+ add $IN01_1,$IN01_1,$H1 -+ umlal $ACC3,$IN01_0,${R3}[0] -+ umlal $ACC4,$IN01_0,${R4}[0] -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ umlal $ACC2,$IN01_0,${R2}[0] -+ and x5,x9,#0x03ffffff -+ umlal $ACC0,$IN01_0,${R0}[0] -+ ubfx x6,x8,#26,#26 -+ umlal $ACC1,$IN01_0,${R1}[0] -+ ubfx x7,x9,#26,#26 -+ -+ add $IN01_3,$IN01_3,$H3 -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ umlal $ACC3,$IN01_1,${R2}[0] -+ extr x8,x12,x8,#52 -+ umlal $ACC4,$IN01_1,${R3}[0] -+ extr x9,x13,x9,#52 -+ umlal $ACC0,$IN01_1,${S4}[0] -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ umlal $ACC2,$IN01_1,${R1}[0] -+ fmov $IN01_0,x4 -+ umlal $ACC1,$IN01_1,${R0}[0] -+ and x8,x8,#0x03ffffff -+ -+ add $IN01_4,$IN01_4,$H4 -+ and x9,x9,#0x03ffffff -+ umlal $ACC3,$IN01_3,${R0}[0] -+ ubfx x10,x12,#14,#26 -+ umlal $ACC0,$IN01_3,${S2}[0] -+ ubfx x11,x13,#14,#26 -+ umlal $ACC4,$IN01_3,${R1}[0] -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ umlal $ACC1,$IN01_3,${S3}[0] -+ fmov $IN01_1,x6 -+ umlal $ACC2,$IN01_3,${S4}[0] -+ add x12,$padbit,x12,lsr#40 -+ -+ umlal $ACC3,$IN01_4,${S4}[0] -+ add x13,$padbit,x13,lsr#40 -+ umlal $ACC0,$IN01_4,${S1}[0] -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ umlal $ACC4,$IN01_4,${R0}[0] -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ umlal $ACC1,$IN01_4,${S2}[0] -+ fmov $IN01_2,x8 -+ umlal $ACC2,$IN01_4,${S3}[0] -+ fmov $IN01_3,x10 -+ fmov $IN01_4,x12 -+ -+ ///////////////////////////////////////////////////////////////// -+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein -+ // and P. Schwabe -+ // -+ // [see discussion in poly1305-armv4 module] -+ -+ ushr $T0.2d,$ACC3,#26 -+ xtn $H3,$ACC3 -+ ushr $T1.2d,$ACC0,#26 -+ and $ACC0,$ACC0,$MASK.2d -+ add $ACC4,$ACC4,$T0.2d // h3 -> h4 -+ bic $H3,#0xfc,lsl#24 // &=0x03ffffff -+ add $ACC1,$ACC1,$T1.2d // h0 -> h1 -+ -+ ushr $T0.2d,$ACC4,#26 -+ xtn $H4,$ACC4 -+ ushr $T1.2d,$ACC1,#26 -+ xtn $H1,$ACC1 -+ bic $H4,#0xfc,lsl#24 -+ add $ACC2,$ACC2,$T1.2d // h1 -> h2 -+ -+ add $ACC0,$ACC0,$T0.2d -+ shl $T0.2d,$T0.2d,#2 -+ shrn $T1.2s,$ACC2,#26 -+ xtn $H2,$ACC2 -+ add $ACC0,$ACC0,$T0.2d // h4 -> h0 -+ bic $H1,#0xfc,lsl#24 -+ add $H3,$H3,$T1.2s // h2 -> h3 -+ bic $H2,#0xfc,lsl#24 -+ -+ shrn $T0.2s,$ACC0,#26 -+ xtn $H0,$ACC0 -+ ushr $T1.2s,$H3,#26 -+ bic $H3,#0xfc,lsl#24 -+ bic $H0,#0xfc,lsl#24 -+ add $H1,$H1,$T0.2s // h0 -> h1 -+ add $H4,$H4,$T1.2s // h3 -> h4 -+ -+ b.hi .Loop_neon -+ -+.Lskip_loop: -+ dup $IN23_2,${IN23_2}[0] -+ add $IN01_2,$IN01_2,$H2 -+ -+ //////////////////////////////////////////////////////////////// -+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 -+ -+ adds $len,$len,#32 -+ b.ne .Long_tail -+ -+ dup $IN23_2,${IN01_2}[0] -+ add $IN23_0,$IN01_0,$H0 -+ add $IN23_3,$IN01_3,$H3 -+ add $IN23_1,$IN01_1,$H1 -+ add $IN23_4,$IN01_4,$H4 -+ -+.Long_tail: -+ dup $IN23_0,${IN23_0}[0] -+ umull2 $ACC0,$IN23_2,${S3} -+ umull2 $ACC3,$IN23_2,${R1} -+ umull2 $ACC4,$IN23_2,${R2} -+ umull2 $ACC2,$IN23_2,${R0} -+ umull2 $ACC1,$IN23_2,${S4} -+ -+ dup $IN23_1,${IN23_1}[0] -+ umlal2 $ACC0,$IN23_0,${R0} -+ umlal2 $ACC2,$IN23_0,${R2} -+ umlal2 $ACC3,$IN23_0,${R3} -+ umlal2 $ACC4,$IN23_0,${R4} -+ umlal2 $ACC1,$IN23_0,${R1} -+ -+ dup $IN23_3,${IN23_3}[0] -+ umlal2 $ACC0,$IN23_1,${S4} -+ umlal2 $ACC3,$IN23_1,${R2} -+ umlal2 $ACC2,$IN23_1,${R1} -+ umlal2 $ACC4,$IN23_1,${R3} -+ umlal2 $ACC1,$IN23_1,${R0} -+ -+ dup $IN23_4,${IN23_4}[0] -+ umlal2 $ACC3,$IN23_3,${R0} -+ umlal2 $ACC4,$IN23_3,${R1} -+ umlal2 $ACC0,$IN23_3,${S2} -+ umlal2 $ACC1,$IN23_3,${S3} -+ umlal2 $ACC2,$IN23_3,${S4} -+ -+ umlal2 $ACC3,$IN23_4,${S4} -+ umlal2 $ACC0,$IN23_4,${S1} -+ umlal2 $ACC4,$IN23_4,${R0} -+ umlal2 $ACC1,$IN23_4,${S2} -+ umlal2 $ACC2,$IN23_4,${S3} -+ -+ b.eq .Lshort_tail -+ -+ //////////////////////////////////////////////////////////////// -+ // (hash+inp[0:1])*r^4:r^3 and accumulate -+ -+ add $IN01_0,$IN01_0,$H0 -+ umlal $ACC3,$IN01_2,${R1} -+ umlal $ACC0,$IN01_2,${S3} -+ umlal $ACC4,$IN01_2,${R2} -+ umlal $ACC1,$IN01_2,${S4} -+ umlal $ACC2,$IN01_2,${R0} -+ -+ add $IN01_1,$IN01_1,$H1 -+ umlal $ACC3,$IN01_0,${R3} -+ umlal $ACC0,$IN01_0,${R0} -+ umlal $ACC4,$IN01_0,${R4} -+ umlal $ACC1,$IN01_0,${R1} -+ umlal $ACC2,$IN01_0,${R2} -+ -+ add $IN01_3,$IN01_3,$H3 -+ umlal $ACC3,$IN01_1,${R2} -+ umlal $ACC0,$IN01_1,${S4} -+ umlal $ACC4,$IN01_1,${R3} -+ umlal $ACC1,$IN01_1,${R0} -+ umlal $ACC2,$IN01_1,${R1} -+ -+ add $IN01_4,$IN01_4,$H4 -+ umlal $ACC3,$IN01_3,${R0} -+ umlal $ACC0,$IN01_3,${S2} -+ umlal $ACC4,$IN01_3,${R1} -+ umlal $ACC1,$IN01_3,${S3} -+ umlal $ACC2,$IN01_3,${S4} -+ -+ umlal $ACC3,$IN01_4,${S4} -+ umlal $ACC0,$IN01_4,${S1} -+ umlal $ACC4,$IN01_4,${R0} -+ umlal $ACC1,$IN01_4,${S2} -+ umlal $ACC2,$IN01_4,${S3} -+ -+.Lshort_tail: -+ //////////////////////////////////////////////////////////////// -+ // horizontal add -+ -+ addp $ACC3,$ACC3,$ACC3 -+ ldp d8,d9,[sp,#16] // meet ABI requirements -+ addp $ACC0,$ACC0,$ACC0 -+ ldp d10,d11,[sp,#32] -+ addp $ACC4,$ACC4,$ACC4 -+ ldp d12,d13,[sp,#48] -+ addp $ACC1,$ACC1,$ACC1 -+ ldp d14,d15,[sp,#64] -+ addp $ACC2,$ACC2,$ACC2 -+ ldr x30,[sp,#8] -+ .inst 0xd50323bf // autiasp -+ -+ //////////////////////////////////////////////////////////////// -+ // lazy reduction, but without narrowing -+ -+ ushr $T0.2d,$ACC3,#26 -+ and $ACC3,$ACC3,$MASK.2d -+ ushr $T1.2d,$ACC0,#26 -+ and $ACC0,$ACC0,$MASK.2d -+ -+ add $ACC4,$ACC4,$T0.2d // h3 -> h4 -+ add $ACC1,$ACC1,$T1.2d // h0 -> h1 -+ -+ ushr $T0.2d,$ACC4,#26 -+ and $ACC4,$ACC4,$MASK.2d -+ ushr $T1.2d,$ACC1,#26 -+ and $ACC1,$ACC1,$MASK.2d -+ add $ACC2,$ACC2,$T1.2d // h1 -> h2 -+ -+ add $ACC0,$ACC0,$T0.2d -+ shl $T0.2d,$T0.2d,#2 -+ ushr $T1.2d,$ACC2,#26 -+ and $ACC2,$ACC2,$MASK.2d -+ add $ACC0,$ACC0,$T0.2d // h4 -> h0 -+ add $ACC3,$ACC3,$T1.2d // h2 -> h3 -+ -+ ushr $T0.2d,$ACC0,#26 -+ and $ACC0,$ACC0,$MASK.2d -+ ushr $T1.2d,$ACC3,#26 -+ and $ACC3,$ACC3,$MASK.2d -+ add $ACC1,$ACC1,$T0.2d // h0 -> h1 -+ add $ACC4,$ACC4,$T1.2d // h3 -> h4 -+ -+ //////////////////////////////////////////////////////////////// -+ // write the result, can be partially reduced -+ -+ st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16 -+ mov x4,#1 -+ st1 {$ACC4}[0],[$ctx] -+ str x4,[$ctx,#8] // set is_base2_26 -+ -+ ldr x29,[sp],#80 -+ ret -+.size poly1305_blocks_neon,.-poly1305_blocks_neon -+ -+.align 5 -+.Lzeros: -+.long 0,0,0,0,0,0,0,0 -+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm" -+.align 2 -+#if !defined(__KERNEL__) && !defined(_WIN64) -+.comm OPENSSL_armcap_P,4,4 -+.hidden OPENSSL_armcap_P -+#endif -+___ -+ -+foreach (split("\n",$code)) { -+ s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or -+ s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or -+ (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or -+ (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or -+ (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or -+ (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or -+ (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1)); -+ -+ s/\.[124]([sd])\[/.$1\[/; -+ s/w#x([0-9]+)/w$1/g; -+ -+ print $_,"\n"; -+} -+close STDOUT; ---- /dev/null -+++ b/arch/arm64/crypto/poly1305-core.S_shipped -@@ -0,0 +1,835 @@ -+#ifndef __KERNEL__ -+# include "arm_arch.h" -+.extern OPENSSL_armcap_P -+#endif -+ -+.text -+ -+// forward "declarations" are required for Apple -+.globl poly1305_blocks -+.globl poly1305_emit -+ -+.globl poly1305_init -+.type poly1305_init,%function -+.align 5 -+poly1305_init: -+ cmp x1,xzr -+ stp xzr,xzr,[x0] // zero hash value -+ stp xzr,xzr,[x0,#16] // [along with is_base2_26] -+ -+ csel x0,xzr,x0,eq -+ b.eq .Lno_key -+ -+#ifndef __KERNEL__ -+ adrp x17,OPENSSL_armcap_P -+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P] -+#endif -+ -+ ldp x7,x8,[x1] // load key -+ mov x9,#0xfffffffc0fffffff -+ movk x9,#0x0fff,lsl#48 -+#ifdef __AARCH64EB__ -+ rev x7,x7 // flip bytes -+ rev x8,x8 -+#endif -+ and x7,x7,x9 // &=0ffffffc0fffffff -+ and x9,x9,#-4 -+ and x8,x8,x9 // &=0ffffffc0ffffffc -+ mov w9,#-1 -+ stp x7,x8,[x0,#32] // save key value -+ str w9,[x0,#48] // impossible key power value -+ -+#ifndef __KERNEL__ -+ tst w17,#ARMV7_NEON -+ -+ adr x12,.Lpoly1305_blocks -+ adr x7,.Lpoly1305_blocks_neon -+ adr x13,.Lpoly1305_emit -+ -+ csel x12,x12,x7,eq -+ -+# ifdef __ILP32__ -+ stp w12,w13,[x2] -+# else -+ stp x12,x13,[x2] -+# endif -+#endif -+ mov x0,#1 -+.Lno_key: -+ ret -+.size poly1305_init,.-poly1305_init -+ -+.type poly1305_blocks,%function -+.align 5 -+poly1305_blocks: -+.Lpoly1305_blocks: -+ ands x2,x2,#-16 -+ b.eq .Lno_data -+ -+ ldp x4,x5,[x0] // load hash value -+ ldp x6,x17,[x0,#16] // [along with is_base2_26] -+ ldp x7,x8,[x0,#32] // load key value -+ -+#ifdef __AARCH64EB__ -+ lsr x12,x4,#32 -+ mov w13,w4 -+ lsr x14,x5,#32 -+ mov w15,w5 -+ lsr x16,x6,#32 -+#else -+ mov w12,w4 -+ lsr x13,x4,#32 -+ mov w14,w5 -+ lsr x15,x5,#32 -+ mov w16,w6 -+#endif -+ -+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 -+ lsr x13,x14,#12 -+ adds x12,x12,x14,lsl#52 -+ add x13,x13,x15,lsl#14 -+ adc x13,x13,xzr -+ lsr x14,x16,#24 -+ adds x13,x13,x16,lsl#40 -+ adc x14,x14,xzr -+ -+ cmp x17,#0 // is_base2_26? -+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) -+ csel x4,x4,x12,eq // choose between radixes -+ csel x5,x5,x13,eq -+ csel x6,x6,x14,eq -+ -+.Loop: -+ ldp x10,x11,[x1],#16 // load input -+ sub x2,x2,#16 -+#ifdef __AARCH64EB__ -+ rev x10,x10 -+ rev x11,x11 -+#endif -+ adds x4,x4,x10 // accumulate input -+ adcs x5,x5,x11 -+ -+ mul x12,x4,x7 // h0*r0 -+ adc x6,x6,x3 -+ umulh x13,x4,x7 -+ -+ mul x10,x5,x9 // h1*5*r1 -+ umulh x11,x5,x9 -+ -+ adds x12,x12,x10 -+ mul x10,x4,x8 // h0*r1 -+ adc x13,x13,x11 -+ umulh x14,x4,x8 -+ -+ adds x13,x13,x10 -+ mul x10,x5,x7 // h1*r0 -+ adc x14,x14,xzr -+ umulh x11,x5,x7 -+ -+ adds x13,x13,x10 -+ mul x10,x6,x9 // h2*5*r1 -+ adc x14,x14,x11 -+ mul x11,x6,x7 // h2*r0 -+ -+ adds x13,x13,x10 -+ adc x14,x14,x11 -+ -+ and x10,x14,#-4 // final reduction -+ and x6,x14,#3 -+ add x10,x10,x14,lsr#2 -+ adds x4,x12,x10 -+ adcs x5,x13,xzr -+ adc x6,x6,xzr -+ -+ cbnz x2,.Loop -+ -+ stp x4,x5,[x0] // store hash value -+ stp x6,xzr,[x0,#16] // [and clear is_base2_26] -+ -+.Lno_data: -+ ret -+.size poly1305_blocks,.-poly1305_blocks -+ -+.type poly1305_emit,%function -+.align 5 -+poly1305_emit: -+.Lpoly1305_emit: -+ ldp x4,x5,[x0] // load hash base 2^64 -+ ldp x6,x7,[x0,#16] // [along with is_base2_26] -+ ldp x10,x11,[x2] // load nonce -+ -+#ifdef __AARCH64EB__ -+ lsr x12,x4,#32 -+ mov w13,w4 -+ lsr x14,x5,#32 -+ mov w15,w5 -+ lsr x16,x6,#32 -+#else -+ mov w12,w4 -+ lsr x13,x4,#32 -+ mov w14,w5 -+ lsr x15,x5,#32 -+ mov w16,w6 -+#endif -+ -+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64 -+ lsr x13,x14,#12 -+ adds x12,x12,x14,lsl#52 -+ add x13,x13,x15,lsl#14 -+ adc x13,x13,xzr -+ lsr x14,x16,#24 -+ adds x13,x13,x16,lsl#40 -+ adc x14,x14,xzr -+ -+ cmp x7,#0 // is_base2_26? -+ csel x4,x4,x12,eq // choose between radixes -+ csel x5,x5,x13,eq -+ csel x6,x6,x14,eq -+ -+ adds x12,x4,#5 // compare to modulus -+ adcs x13,x5,xzr -+ adc x14,x6,xzr -+ -+ tst x14,#-4 // see if it's carried/borrowed -+ -+ csel x4,x4,x12,eq -+ csel x5,x5,x13,eq -+ -+#ifdef __AARCH64EB__ -+ ror x10,x10,#32 // flip nonce words -+ ror x11,x11,#32 -+#endif -+ adds x4,x4,x10 // accumulate nonce -+ adc x5,x5,x11 -+#ifdef __AARCH64EB__ -+ rev x4,x4 // flip output bytes -+ rev x5,x5 -+#endif -+ stp x4,x5,[x1] // write result -+ -+ ret -+.size poly1305_emit,.-poly1305_emit -+.type poly1305_mult,%function -+.align 5 -+poly1305_mult: -+ mul x12,x4,x7 // h0*r0 -+ umulh x13,x4,x7 -+ -+ mul x10,x5,x9 // h1*5*r1 -+ umulh x11,x5,x9 -+ -+ adds x12,x12,x10 -+ mul x10,x4,x8 // h0*r1 -+ adc x13,x13,x11 -+ umulh x14,x4,x8 -+ -+ adds x13,x13,x10 -+ mul x10,x5,x7 // h1*r0 -+ adc x14,x14,xzr -+ umulh x11,x5,x7 -+ -+ adds x13,x13,x10 -+ mul x10,x6,x9 // h2*5*r1 -+ adc x14,x14,x11 -+ mul x11,x6,x7 // h2*r0 -+ -+ adds x13,x13,x10 -+ adc x14,x14,x11 -+ -+ and x10,x14,#-4 // final reduction -+ and x6,x14,#3 -+ add x10,x10,x14,lsr#2 -+ adds x4,x12,x10 -+ adcs x5,x13,xzr -+ adc x6,x6,xzr -+ -+ ret -+.size poly1305_mult,.-poly1305_mult -+ -+.type poly1305_splat,%function -+.align 4 -+poly1305_splat: -+ and x12,x4,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x13,x4,#26,#26 -+ extr x14,x5,x4,#52 -+ and x14,x14,#0x03ffffff -+ ubfx x15,x5,#14,#26 -+ extr x16,x6,x5,#40 -+ -+ str w12,[x0,#16*0] // r0 -+ add w12,w13,w13,lsl#2 // r1*5 -+ str w13,[x0,#16*1] // r1 -+ add w13,w14,w14,lsl#2 // r2*5 -+ str w12,[x0,#16*2] // s1 -+ str w14,[x0,#16*3] // r2 -+ add w14,w15,w15,lsl#2 // r3*5 -+ str w13,[x0,#16*4] // s2 -+ str w15,[x0,#16*5] // r3 -+ add w15,w16,w16,lsl#2 // r4*5 -+ str w14,[x0,#16*6] // s3 -+ str w16,[x0,#16*7] // r4 -+ str w15,[x0,#16*8] // s4 -+ -+ ret -+.size poly1305_splat,.-poly1305_splat -+ -+#ifdef __KERNEL__ -+.globl poly1305_blocks_neon -+#endif -+.type poly1305_blocks_neon,%function -+.align 5 -+poly1305_blocks_neon: -+.Lpoly1305_blocks_neon: -+ ldr x17,[x0,#24] -+ cmp x2,#128 -+ b.lo .Lpoly1305_blocks -+ -+ .inst 0xd503233f // paciasp -+ stp x29,x30,[sp,#-80]! -+ add x29,sp,#0 -+ -+ stp d8,d9,[sp,#16] // meet ABI requirements -+ stp d10,d11,[sp,#32] -+ stp d12,d13,[sp,#48] -+ stp d14,d15,[sp,#64] -+ -+ cbz x17,.Lbase2_64_neon -+ -+ ldp w10,w11,[x0] // load hash value base 2^26 -+ ldp w12,w13,[x0,#8] -+ ldr w14,[x0,#16] -+ -+ tst x2,#31 -+ b.eq .Leven_neon -+ -+ ldp x7,x8,[x0,#32] // load key value -+ -+ add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 -+ lsr x5,x12,#12 -+ adds x4,x4,x12,lsl#52 -+ add x5,x5,x13,lsl#14 -+ adc x5,x5,xzr -+ lsr x6,x14,#24 -+ adds x5,x5,x14,lsl#40 -+ adc x14,x6,xzr // can be partially reduced... -+ -+ ldp x12,x13,[x1],#16 // load input -+ sub x2,x2,#16 -+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) -+ -+#ifdef __AARCH64EB__ -+ rev x12,x12 -+ rev x13,x13 -+#endif -+ adds x4,x4,x12 // accumulate input -+ adcs x5,x5,x13 -+ adc x6,x6,x3 -+ -+ bl poly1305_mult -+ -+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x11,x4,#26,#26 -+ extr x12,x5,x4,#52 -+ and x12,x12,#0x03ffffff -+ ubfx x13,x5,#14,#26 -+ extr x14,x6,x5,#40 -+ -+ b .Leven_neon -+ -+.align 4 -+.Lbase2_64_neon: -+ ldp x7,x8,[x0,#32] // load key value -+ -+ ldp x4,x5,[x0] // load hash value base 2^64 -+ ldr x6,[x0,#16] -+ -+ tst x2,#31 -+ b.eq .Linit_neon -+ -+ ldp x12,x13,[x1],#16 // load input -+ sub x2,x2,#16 -+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) -+#ifdef __AARCH64EB__ -+ rev x12,x12 -+ rev x13,x13 -+#endif -+ adds x4,x4,x12 // accumulate input -+ adcs x5,x5,x13 -+ adc x6,x6,x3 -+ -+ bl poly1305_mult -+ -+.Linit_neon: -+ ldr w17,[x0,#48] // first table element -+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 -+ ubfx x11,x4,#26,#26 -+ extr x12,x5,x4,#52 -+ and x12,x12,#0x03ffffff -+ ubfx x13,x5,#14,#26 -+ extr x14,x6,x5,#40 -+ -+ cmp w17,#-1 // is value impossible? -+ b.ne .Leven_neon -+ -+ fmov d24,x10 -+ fmov d25,x11 -+ fmov d26,x12 -+ fmov d27,x13 -+ fmov d28,x14 -+ -+ ////////////////////////////////// initialize r^n table -+ mov x4,x7 // r^1 -+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) -+ mov x5,x8 -+ mov x6,xzr -+ add x0,x0,#48+12 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^2 -+ sub x0,x0,#4 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^3 -+ sub x0,x0,#4 -+ bl poly1305_splat -+ -+ bl poly1305_mult // r^4 -+ sub x0,x0,#4 -+ bl poly1305_splat -+ sub x0,x0,#48 // restore original x0 -+ b .Ldo_neon -+ -+.align 4 -+.Leven_neon: -+ fmov d24,x10 -+ fmov d25,x11 -+ fmov d26,x12 -+ fmov d27,x13 -+ fmov d28,x14 -+ -+.Ldo_neon: -+ ldp x8,x12,[x1,#32] // inp[2:3] -+ subs x2,x2,#64 -+ ldp x9,x13,[x1,#48] -+ add x16,x1,#96 -+ adr x17,.Lzeros -+ -+ lsl x3,x3,#24 -+ add x15,x0,#48 -+ -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ and x5,x9,#0x03ffffff -+ ubfx x6,x8,#26,#26 -+ ubfx x7,x9,#26,#26 -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ extr x8,x12,x8,#52 -+ extr x9,x13,x9,#52 -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ fmov d14,x4 -+ and x8,x8,#0x03ffffff -+ and x9,x9,#0x03ffffff -+ ubfx x10,x12,#14,#26 -+ ubfx x11,x13,#14,#26 -+ add x12,x3,x12,lsr#40 -+ add x13,x3,x13,lsr#40 -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ fmov d15,x6 -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ fmov d16,x8 -+ fmov d17,x10 -+ fmov d18,x12 -+ -+ ldp x8,x12,[x1],#16 // inp[0:1] -+ ldp x9,x13,[x1],#48 -+ -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64 -+ ld1 {v8.4s},[x15] -+ -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ and x5,x9,#0x03ffffff -+ ubfx x6,x8,#26,#26 -+ ubfx x7,x9,#26,#26 -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ extr x8,x12,x8,#52 -+ extr x9,x13,x9,#52 -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ fmov d9,x4 -+ and x8,x8,#0x03ffffff -+ and x9,x9,#0x03ffffff -+ ubfx x10,x12,#14,#26 -+ ubfx x11,x13,#14,#26 -+ add x12,x3,x12,lsr#40 -+ add x13,x3,x13,lsr#40 -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ fmov d10,x6 -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ movi v31.2d,#-1 -+ fmov d11,x8 -+ fmov d12,x10 -+ fmov d13,x12 -+ ushr v31.2d,v31.2d,#38 -+ -+ b.ls .Lskip_loop -+ -+.align 4 -+.Loop_neon: -+ //////////////////////////////////////////////////////////////// -+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 -+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r -+ // ___________________/ -+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 -+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r -+ // ___________________/ ____________________/ -+ // -+ // Note that we start with inp[2:3]*r^2. This is because it -+ // doesn't depend on reduction in previous iteration. -+ //////////////////////////////////////////////////////////////// -+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 -+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 -+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 -+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 -+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 -+ -+ subs x2,x2,#64 -+ umull v23.2d,v14.2s,v7.s[2] -+ csel x16,x17,x16,lo -+ umull v22.2d,v14.2s,v5.s[2] -+ umull v21.2d,v14.2s,v3.s[2] -+ ldp x8,x12,[x16],#16 // inp[2:3] (or zero) -+ umull v20.2d,v14.2s,v1.s[2] -+ ldp x9,x13,[x16],#48 -+ umull v19.2d,v14.2s,v0.s[2] -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ -+ umlal v23.2d,v15.2s,v5.s[2] -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ umlal v22.2d,v15.2s,v3.s[2] -+ and x5,x9,#0x03ffffff -+ umlal v21.2d,v15.2s,v1.s[2] -+ ubfx x6,x8,#26,#26 -+ umlal v20.2d,v15.2s,v0.s[2] -+ ubfx x7,x9,#26,#26 -+ umlal v19.2d,v15.2s,v8.s[2] -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ -+ umlal v23.2d,v16.2s,v3.s[2] -+ extr x8,x12,x8,#52 -+ umlal v22.2d,v16.2s,v1.s[2] -+ extr x9,x13,x9,#52 -+ umlal v21.2d,v16.2s,v0.s[2] -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ umlal v20.2d,v16.2s,v8.s[2] -+ fmov d14,x4 -+ umlal v19.2d,v16.2s,v6.s[2] -+ and x8,x8,#0x03ffffff -+ -+ umlal v23.2d,v17.2s,v1.s[2] -+ and x9,x9,#0x03ffffff -+ umlal v22.2d,v17.2s,v0.s[2] -+ ubfx x10,x12,#14,#26 -+ umlal v21.2d,v17.2s,v8.s[2] -+ ubfx x11,x13,#14,#26 -+ umlal v20.2d,v17.2s,v6.s[2] -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ umlal v19.2d,v17.2s,v4.s[2] -+ fmov d15,x6 -+ -+ add v11.2s,v11.2s,v26.2s -+ add x12,x3,x12,lsr#40 -+ umlal v23.2d,v18.2s,v0.s[2] -+ add x13,x3,x13,lsr#40 -+ umlal v22.2d,v18.2s,v8.s[2] -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ umlal v21.2d,v18.2s,v6.s[2] -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ umlal v20.2d,v18.2s,v4.s[2] -+ fmov d16,x8 -+ umlal v19.2d,v18.2s,v2.s[2] -+ fmov d17,x10 -+ -+ //////////////////////////////////////////////////////////////// -+ // (hash+inp[0:1])*r^4 and accumulate -+ -+ add v9.2s,v9.2s,v24.2s -+ fmov d18,x12 -+ umlal v22.2d,v11.2s,v1.s[0] -+ ldp x8,x12,[x1],#16 // inp[0:1] -+ umlal v19.2d,v11.2s,v6.s[0] -+ ldp x9,x13,[x1],#48 -+ umlal v23.2d,v11.2s,v3.s[0] -+ umlal v20.2d,v11.2s,v8.s[0] -+ umlal v21.2d,v11.2s,v0.s[0] -+#ifdef __AARCH64EB__ -+ rev x8,x8 -+ rev x12,x12 -+ rev x9,x9 -+ rev x13,x13 -+#endif -+ -+ add v10.2s,v10.2s,v25.2s -+ umlal v22.2d,v9.2s,v5.s[0] -+ umlal v23.2d,v9.2s,v7.s[0] -+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 -+ umlal v21.2d,v9.2s,v3.s[0] -+ and x5,x9,#0x03ffffff -+ umlal v19.2d,v9.2s,v0.s[0] -+ ubfx x6,x8,#26,#26 -+ umlal v20.2d,v9.2s,v1.s[0] -+ ubfx x7,x9,#26,#26 -+ -+ add v12.2s,v12.2s,v27.2s -+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 -+ umlal v22.2d,v10.2s,v3.s[0] -+ extr x8,x12,x8,#52 -+ umlal v23.2d,v10.2s,v5.s[0] -+ extr x9,x13,x9,#52 -+ umlal v19.2d,v10.2s,v8.s[0] -+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 -+ umlal v21.2d,v10.2s,v1.s[0] -+ fmov d9,x4 -+ umlal v20.2d,v10.2s,v0.s[0] -+ and x8,x8,#0x03ffffff -+ -+ add v13.2s,v13.2s,v28.2s -+ and x9,x9,#0x03ffffff -+ umlal v22.2d,v12.2s,v0.s[0] -+ ubfx x10,x12,#14,#26 -+ umlal v19.2d,v12.2s,v4.s[0] -+ ubfx x11,x13,#14,#26 -+ umlal v23.2d,v12.2s,v1.s[0] -+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 -+ umlal v20.2d,v12.2s,v6.s[0] -+ fmov d10,x6 -+ umlal v21.2d,v12.2s,v8.s[0] -+ add x12,x3,x12,lsr#40 -+ -+ umlal v22.2d,v13.2s,v8.s[0] -+ add x13,x3,x13,lsr#40 -+ umlal v19.2d,v13.2s,v2.s[0] -+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 -+ umlal v23.2d,v13.2s,v0.s[0] -+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 -+ umlal v20.2d,v13.2s,v4.s[0] -+ fmov d11,x8 -+ umlal v21.2d,v13.2s,v6.s[0] -+ fmov d12,x10 -+ fmov d13,x12 -+ -+ ///////////////////////////////////////////////////////////////// -+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein -+ // and P. Schwabe -+ // -+ // [see discussion in poly1305-armv4 module] -+ -+ ushr v29.2d,v22.2d,#26 -+ xtn v27.2s,v22.2d -+ ushr v30.2d,v19.2d,#26 -+ and v19.16b,v19.16b,v31.16b -+ add v23.2d,v23.2d,v29.2d // h3 -> h4 -+ bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff -+ add v20.2d,v20.2d,v30.2d // h0 -> h1 -+ -+ ushr v29.2d,v23.2d,#26 -+ xtn v28.2s,v23.2d -+ ushr v30.2d,v20.2d,#26 -+ xtn v25.2s,v20.2d -+ bic v28.2s,#0xfc,lsl#24 -+ add v21.2d,v21.2d,v30.2d // h1 -> h2 -+ -+ add v19.2d,v19.2d,v29.2d -+ shl v29.2d,v29.2d,#2 -+ shrn v30.2s,v21.2d,#26 -+ xtn v26.2s,v21.2d -+ add v19.2d,v19.2d,v29.2d // h4 -> h0 -+ bic v25.2s,#0xfc,lsl#24 -+ add v27.2s,v27.2s,v30.2s // h2 -> h3 -+ bic v26.2s,#0xfc,lsl#24 -+ -+ shrn v29.2s,v19.2d,#26 -+ xtn v24.2s,v19.2d -+ ushr v30.2s,v27.2s,#26 -+ bic v27.2s,#0xfc,lsl#24 -+ bic v24.2s,#0xfc,lsl#24 -+ add v25.2s,v25.2s,v29.2s // h0 -> h1 -+ add v28.2s,v28.2s,v30.2s // h3 -> h4 -+ -+ b.hi .Loop_neon -+ -+.Lskip_loop: -+ dup v16.2d,v16.d[0] -+ add v11.2s,v11.2s,v26.2s -+ -+ //////////////////////////////////////////////////////////////// -+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 -+ -+ adds x2,x2,#32 -+ b.ne .Long_tail -+ -+ dup v16.2d,v11.d[0] -+ add v14.2s,v9.2s,v24.2s -+ add v17.2s,v12.2s,v27.2s -+ add v15.2s,v10.2s,v25.2s -+ add v18.2s,v13.2s,v28.2s -+ -+.Long_tail: -+ dup v14.2d,v14.d[0] -+ umull2 v19.2d,v16.4s,v6.4s -+ umull2 v22.2d,v16.4s,v1.4s -+ umull2 v23.2d,v16.4s,v3.4s -+ umull2 v21.2d,v16.4s,v0.4s -+ umull2 v20.2d,v16.4s,v8.4s -+ -+ dup v15.2d,v15.d[0] -+ umlal2 v19.2d,v14.4s,v0.4s -+ umlal2 v21.2d,v14.4s,v3.4s -+ umlal2 v22.2d,v14.4s,v5.4s -+ umlal2 v23.2d,v14.4s,v7.4s -+ umlal2 v20.2d,v14.4s,v1.4s -+ -+ dup v17.2d,v17.d[0] -+ umlal2 v19.2d,v15.4s,v8.4s -+ umlal2 v22.2d,v15.4s,v3.4s -+ umlal2 v21.2d,v15.4s,v1.4s -+ umlal2 v23.2d,v15.4s,v5.4s -+ umlal2 v20.2d,v15.4s,v0.4s -+ -+ dup v18.2d,v18.d[0] -+ umlal2 v22.2d,v17.4s,v0.4s -+ umlal2 v23.2d,v17.4s,v1.4s -+ umlal2 v19.2d,v17.4s,v4.4s -+ umlal2 v20.2d,v17.4s,v6.4s -+ umlal2 v21.2d,v17.4s,v8.4s -+ -+ umlal2 v22.2d,v18.4s,v8.4s -+ umlal2 v19.2d,v18.4s,v2.4s -+ umlal2 v23.2d,v18.4s,v0.4s -+ umlal2 v20.2d,v18.4s,v4.4s -+ umlal2 v21.2d,v18.4s,v6.4s -+ -+ b.eq .Lshort_tail -+ -+ //////////////////////////////////////////////////////////////// -+ // (hash+inp[0:1])*r^4:r^3 and accumulate -+ -+ add v9.2s,v9.2s,v24.2s -+ umlal v22.2d,v11.2s,v1.2s -+ umlal v19.2d,v11.2s,v6.2s -+ umlal v23.2d,v11.2s,v3.2s -+ umlal v20.2d,v11.2s,v8.2s -+ umlal v21.2d,v11.2s,v0.2s -+ -+ add v10.2s,v10.2s,v25.2s -+ umlal v22.2d,v9.2s,v5.2s -+ umlal v19.2d,v9.2s,v0.2s -+ umlal v23.2d,v9.2s,v7.2s -+ umlal v20.2d,v9.2s,v1.2s -+ umlal v21.2d,v9.2s,v3.2s -+ -+ add v12.2s,v12.2s,v27.2s -+ umlal v22.2d,v10.2s,v3.2s -+ umlal v19.2d,v10.2s,v8.2s -+ umlal v23.2d,v10.2s,v5.2s -+ umlal v20.2d,v10.2s,v0.2s -+ umlal v21.2d,v10.2s,v1.2s -+ -+ add v13.2s,v13.2s,v28.2s -+ umlal v22.2d,v12.2s,v0.2s -+ umlal v19.2d,v12.2s,v4.2s -+ umlal v23.2d,v12.2s,v1.2s -+ umlal v20.2d,v12.2s,v6.2s -+ umlal v21.2d,v12.2s,v8.2s -+ -+ umlal v22.2d,v13.2s,v8.2s -+ umlal v19.2d,v13.2s,v2.2s -+ umlal v23.2d,v13.2s,v0.2s -+ umlal v20.2d,v13.2s,v4.2s -+ umlal v21.2d,v13.2s,v6.2s -+ -+.Lshort_tail: -+ //////////////////////////////////////////////////////////////// -+ // horizontal add -+ -+ addp v22.2d,v22.2d,v22.2d -+ ldp d8,d9,[sp,#16] // meet ABI requirements -+ addp v19.2d,v19.2d,v19.2d -+ ldp d10,d11,[sp,#32] -+ addp v23.2d,v23.2d,v23.2d -+ ldp d12,d13,[sp,#48] -+ addp v20.2d,v20.2d,v20.2d -+ ldp d14,d15,[sp,#64] -+ addp v21.2d,v21.2d,v21.2d -+ ldr x30,[sp,#8] -+ .inst 0xd50323bf // autiasp -+ -+ //////////////////////////////////////////////////////////////// -+ // lazy reduction, but without narrowing -+ -+ ushr v29.2d,v22.2d,#26 -+ and v22.16b,v22.16b,v31.16b -+ ushr v30.2d,v19.2d,#26 -+ and v19.16b,v19.16b,v31.16b -+ -+ add v23.2d,v23.2d,v29.2d // h3 -> h4 -+ add v20.2d,v20.2d,v30.2d // h0 -> h1 -+ -+ ushr v29.2d,v23.2d,#26 -+ and v23.16b,v23.16b,v31.16b -+ ushr v30.2d,v20.2d,#26 -+ and v20.16b,v20.16b,v31.16b -+ add v21.2d,v21.2d,v30.2d // h1 -> h2 -+ -+ add v19.2d,v19.2d,v29.2d -+ shl v29.2d,v29.2d,#2 -+ ushr v30.2d,v21.2d,#26 -+ and v21.16b,v21.16b,v31.16b -+ add v19.2d,v19.2d,v29.2d // h4 -> h0 -+ add v22.2d,v22.2d,v30.2d // h2 -> h3 -+ -+ ushr v29.2d,v19.2d,#26 -+ and v19.16b,v19.16b,v31.16b -+ ushr v30.2d,v22.2d,#26 -+ and v22.16b,v22.16b,v31.16b -+ add v20.2d,v20.2d,v29.2d // h0 -> h1 -+ add v23.2d,v23.2d,v30.2d // h3 -> h4 -+ -+ //////////////////////////////////////////////////////////////// -+ // write the result, can be partially reduced -+ -+ st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 -+ mov x4,#1 -+ st1 {v23.s}[0],[x0] -+ str x4,[x0,#8] // set is_base2_26 -+ -+ ldr x29,[sp],#80 -+ ret -+.size poly1305_blocks_neon,.-poly1305_blocks_neon -+ -+.align 5 -+.Lzeros: -+.long 0,0,0,0,0,0,0,0 -+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm" -+.align 2 -+#if !defined(__KERNEL__) && !defined(_WIN64) -+.comm OPENSSL_armcap_P,4,4 -+.hidden OPENSSL_armcap_P -+#endif ---- /dev/null -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -0,0 +1,237 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 -+ * -+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> -+ */ -+ -+#include <asm/hwcap.h> -+#include <asm/neon.h> -+#include <asm/simd.h> -+#include <asm/unaligned.h> -+#include <crypto/algapi.h> -+#include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> -+#include <crypto/internal/simd.h> -+#include <linux/cpufeature.h> -+#include <linux/crypto.h> -+#include <linux/jump_label.h> -+#include <linux/module.h> -+ -+asmlinkage void poly1305_init_arm64(void *state, const u8 *key); -+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); -+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); -+asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -+ -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+{ -+ poly1305_init_arm64(&dctx->h, key); -+ dctx->s[0] = get_unaligned_le32(key + 16); -+ dctx->s[1] = get_unaligned_le32(key + 20); -+ dctx->s[2] = get_unaligned_le32(key + 24); -+ dctx->s[3] = get_unaligned_le32(key + 28); -+ dctx->buflen = 0; -+} -+EXPORT_SYMBOL(poly1305_init_arch); -+ -+static int neon_poly1305_init(struct shash_desc *desc) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ dctx->buflen = 0; -+ dctx->rset = 0; -+ dctx->sset = false; -+ -+ return 0; -+} -+ -+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, -+ u32 len, u32 hibit, bool do_neon) -+{ -+ if (unlikely(!dctx->sset)) { -+ if (!dctx->rset) { -+ poly1305_init_arch(dctx, src); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->rset = 1; -+ } -+ if (len >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ if (len < POLY1305_BLOCK_SIZE) -+ return; -+ } -+ -+ len &= ~(POLY1305_BLOCK_SIZE - 1); -+ -+ if (static_branch_likely(&have_neon) && likely(do_neon)) -+ poly1305_blocks_neon(&dctx->h, src, len, hibit); -+ else -+ poly1305_blocks(&dctx->h, src, len, hibit); -+} -+ -+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, -+ const u8 *src, u32 len, bool do_neon) -+{ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ len -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ neon_poly1305_blocks(dctx, dctx->buf, -+ POLY1305_BLOCK_SIZE, 1, false); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(len >= POLY1305_BLOCK_SIZE)) { -+ neon_poly1305_blocks(dctx, src, len, 1, do_neon); -+ src += round_down(len, POLY1305_BLOCK_SIZE); -+ len %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(len)) { -+ dctx->buflen = len; -+ memcpy(dctx->buf, src, len); -+ } -+} -+ -+static int neon_poly1305_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen) -+{ -+ bool do_neon = crypto_simd_usable() && srclen > 128; -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (static_branch_likely(&have_neon) && do_neon) -+ kernel_neon_begin(); -+ neon_poly1305_do_update(dctx, src, srclen, do_neon); -+ if (static_branch_likely(&have_neon) && do_neon) -+ kernel_neon_end(); -+ return 0; -+} -+ -+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, -+ unsigned int nbytes) -+{ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ nbytes -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { -+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); -+ -+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) { -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, len, 1); -+ kernel_neon_end(); -+ } else { -+ poly1305_blocks(&dctx->h, src, len, 1); -+ } -+ src += len; -+ nbytes %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(nbytes)) { -+ dctx->buflen = nbytes; -+ memcpy(dctx->buf, src, nbytes); -+ } -+} -+EXPORT_SYMBOL(poly1305_update_arch); -+ -+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -+{ -+ __le32 digest[4]; -+ u64 f = 0; -+ -+ if (unlikely(dctx->buflen)) { -+ dctx->buf[dctx->buflen++] = 1; -+ memset(dctx->buf + dctx->buflen, 0, -+ POLY1305_BLOCK_SIZE - dctx->buflen); -+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); -+ } -+ -+ poly1305_emit(&dctx->h, digest, dctx->s); -+ -+ /* mac = (h + s) % (2^128) */ -+ f = (f >> 32) + le32_to_cpu(digest[0]); -+ put_unaligned_le32(f, dst); -+ f = (f >> 32) + le32_to_cpu(digest[1]); -+ put_unaligned_le32(f, dst + 4); -+ f = (f >> 32) + le32_to_cpu(digest[2]); -+ put_unaligned_le32(f, dst + 8); -+ f = (f >> 32) + le32_to_cpu(digest[3]); -+ put_unaligned_le32(f, dst + 12); -+ -+ *dctx = (struct poly1305_desc_ctx){}; -+} -+EXPORT_SYMBOL(poly1305_final_arch); -+ -+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (unlikely(!dctx->sset)) -+ return -ENOKEY; -+ -+ poly1305_final_arch(dctx, dst); -+ return 0; -+} -+ -+static struct shash_alg neon_poly1305_alg = { -+ .init = neon_poly1305_init, -+ .update = neon_poly1305_update, -+ .final = neon_poly1305_final, -+ .digestsize = POLY1305_DIGEST_SIZE, -+ .descsize = sizeof(struct poly1305_desc_ctx), -+ -+ .base.cra_name = "poly1305", -+ .base.cra_driver_name = "poly1305-neon", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = POLY1305_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+}; -+ -+static int __init neon_poly1305_mod_init(void) -+{ -+ if (!cpu_have_named_feature(ASIMD)) -+ return 0; -+ -+ static_branch_enable(&have_neon); -+ -+ return crypto_register_shash(&neon_poly1305_alg); -+} -+ -+static void __exit neon_poly1305_mod_exit(void) -+{ -+ if (cpu_have_named_feature(ASIMD)) -+ crypto_unregister_shash(&neon_poly1305_alg); -+} -+ -+module_init(neon_poly1305_mod_init); -+module_exit(neon_poly1305_mod_exit); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("poly1305"); -+MODULE_ALIAS_CRYPTO("poly1305-neon"); ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -40,6 +40,7 @@ config CRYPTO_LIB_DES - config CRYPTO_LIB_POLY1305_RSIZE - int - default 4 if X86_64 -+ default 9 if ARM64 - default 1 - - config CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0019-crypto-arm-poly1305-incorporate-OpenSSL-CRYPTOGAMS-N.patch b/target/linux/generic/backport-5.4/080-wireguard-0019-crypto-arm-poly1305-incorporate-OpenSSL-CRYPTOGAMS-N.patch deleted file mode 100644 index 367b20fc3a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0019-crypto-arm-poly1305-incorporate-OpenSSL-CRYPTOGAMS-N.patch +++ /dev/null @@ -1,2776 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:25 +0100 -Subject: [PATCH] crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON - implementation - -commit a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc upstream. - -This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation -for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL -project. The file 'poly1305-armv4.pl' is taken straight from this upstream -GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f, -and already contains all the changes required to build it as part of a -Linux kernel module. - -[0] https://github.com/dot-asm/cryptogams - -Co-developed-by: Andy Polyakov <appro@cryptogams.org> -Signed-off-by: Andy Polyakov <appro@cryptogams.org> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/Kconfig | 5 + - arch/arm/crypto/Makefile | 12 +- - arch/arm/crypto/poly1305-armv4.pl | 1236 +++++++++++++++++++++++ - arch/arm/crypto/poly1305-core.S_shipped | 1158 +++++++++++++++++++++ - arch/arm/crypto/poly1305-glue.c | 276 +++++ - lib/crypto/Kconfig | 2 +- - 6 files changed, 2687 insertions(+), 2 deletions(-) - create mode 100644 arch/arm/crypto/poly1305-armv4.pl - create mode 100644 arch/arm/crypto/poly1305-core.S_shipped - create mode 100644 arch/arm/crypto/poly1305-glue.c - ---- a/arch/arm/crypto/Kconfig -+++ b/arch/arm/crypto/Kconfig -@@ -131,6 +131,11 @@ config CRYPTO_CHACHA20_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_ARCH_HAVE_LIB_CHACHA - -+config CRYPTO_POLY1305_ARM -+ tristate "Accelerated scalar and SIMD Poly1305 hash implementations" -+ select CRYPTO_HASH -+ select CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ - config CRYPTO_NHPOLY1305_NEON - tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" - depends on KERNEL_MODE_NEON ---- a/arch/arm/crypto/Makefile -+++ b/arch/arm/crypto/Makefile -@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sh - obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o - obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o - obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o - - ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -@@ -55,12 +56,16 @@ crct10dif-arm-ce-y := crct10dif-ce-core. - crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o - chacha-neon-y := chacha-scalar-core.o chacha-glue.o - chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o -+poly1305-arm-y := poly1305-core.o poly1305-glue.o - nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o - - ifdef REGENERATE_ARM_CRYPTO - quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) > $(@) - -+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl -+ $(call cmd,perl) -+ - $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl - $(call cmd,perl) - -@@ -68,4 +73,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha - $(call cmd,perl) - endif - --clean-files += sha256-core.S sha512-core.S -+clean-files += poly1305-core.S sha256-core.S sha512-core.S -+ -+# massage the perlasm code a bit so we only get the NEON routine if we need it -+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 -+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 -+AFLAGS_poly1305-core.o += $(poly1305-aflags-y) ---- /dev/null -+++ b/arch/arm/crypto/poly1305-armv4.pl -@@ -0,0 +1,1236 @@ -+#!/usr/bin/env perl -+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause -+# -+# ==================================================================== -+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL -+# project. -+# ==================================================================== -+# -+# IALU(*)/gcc-4.4 NEON -+# -+# ARM11xx(ARMv6) 7.78/+100% - -+# Cortex-A5 6.35/+130% 3.00 -+# Cortex-A8 6.25/+115% 2.36 -+# Cortex-A9 5.10/+95% 2.55 -+# Cortex-A15 3.85/+85% 1.25(**) -+# Snapdragon S4 5.70/+100% 1.48(**) -+# -+# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; -+# (**) these are trade-off results, they can be improved by ~8% but at -+# the cost of 15/12% regression on Cortex-A5/A7, it's even possible -+# to improve Cortex-A9 result, but then A5/A7 loose more than 20%; -+ -+$flavour = shift; -+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } -+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } -+ -+if ($flavour && $flavour ne "void") { -+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or -+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or -+ die "can't locate arm-xlate.pl"; -+ -+ open STDOUT,"| \"$^X\" $xlate $flavour $output"; -+} else { -+ open STDOUT,">$output"; -+} -+ -+($ctx,$inp,$len,$padbit)=map("r$_",(0..3)); -+ -+$code.=<<___; -+#ifndef __KERNEL__ -+# include "arm_arch.h" -+#else -+# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -+# define poly1305_init poly1305_init_arm -+# define poly1305_blocks poly1305_blocks_arm -+# define poly1305_emit poly1305_emit_arm -+.globl poly1305_blocks_neon -+#endif -+ -+#if defined(__thumb2__) -+.syntax unified -+.thumb -+#else -+.code 32 -+#endif -+ -+.text -+ -+.globl poly1305_emit -+.globl poly1305_blocks -+.globl poly1305_init -+.type poly1305_init,%function -+.align 5 -+poly1305_init: -+.Lpoly1305_init: -+ stmdb sp!,{r4-r11} -+ -+ eor r3,r3,r3 -+ cmp $inp,#0 -+ str r3,[$ctx,#0] @ zero hash value -+ str r3,[$ctx,#4] -+ str r3,[$ctx,#8] -+ str r3,[$ctx,#12] -+ str r3,[$ctx,#16] -+ str r3,[$ctx,#36] @ clear is_base2_26 -+ add $ctx,$ctx,#20 -+ -+#ifdef __thumb2__ -+ it eq -+#endif -+ moveq r0,#0 -+ beq .Lno_key -+ -+#if __ARM_MAX_ARCH__>=7 -+ mov r3,#-1 -+ str r3,[$ctx,#28] @ impossible key power value -+# ifndef __KERNEL__ -+ adr r11,.Lpoly1305_init -+ ldr r12,.LOPENSSL_armcap -+# endif -+#endif -+ ldrb r4,[$inp,#0] -+ mov r10,#0x0fffffff -+ ldrb r5,[$inp,#1] -+ and r3,r10,#-4 @ 0x0ffffffc -+ ldrb r6,[$inp,#2] -+ ldrb r7,[$inp,#3] -+ orr r4,r4,r5,lsl#8 -+ ldrb r5,[$inp,#4] -+ orr r4,r4,r6,lsl#16 -+ ldrb r6,[$inp,#5] -+ orr r4,r4,r7,lsl#24 -+ ldrb r7,[$inp,#6] -+ and r4,r4,r10 -+ -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+# if !defined(_WIN32) -+ ldr r12,[r11,r12] @ OPENSSL_armcap_P -+# endif -+# if defined(__APPLE__) || defined(_WIN32) -+ ldr r12,[r12] -+# endif -+#endif -+ ldrb r8,[$inp,#7] -+ orr r5,r5,r6,lsl#8 -+ ldrb r6,[$inp,#8] -+ orr r5,r5,r7,lsl#16 -+ ldrb r7,[$inp,#9] -+ orr r5,r5,r8,lsl#24 -+ ldrb r8,[$inp,#10] -+ and r5,r5,r3 -+ -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+ tst r12,#ARMV7_NEON @ check for NEON -+# ifdef __thumb2__ -+ adr r9,.Lpoly1305_blocks_neon -+ adr r11,.Lpoly1305_blocks -+ it ne -+ movne r11,r9 -+ adr r12,.Lpoly1305_emit -+ orr r11,r11,#1 @ thumb-ify addresses -+ orr r12,r12,#1 -+# else -+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) -+ ite eq -+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) -+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) -+# endif -+#endif -+ ldrb r9,[$inp,#11] -+ orr r6,r6,r7,lsl#8 -+ ldrb r7,[$inp,#12] -+ orr r6,r6,r8,lsl#16 -+ ldrb r8,[$inp,#13] -+ orr r6,r6,r9,lsl#24 -+ ldrb r9,[$inp,#14] -+ and r6,r6,r3 -+ -+ ldrb r10,[$inp,#15] -+ orr r7,r7,r8,lsl#8 -+ str r4,[$ctx,#0] -+ orr r7,r7,r9,lsl#16 -+ str r5,[$ctx,#4] -+ orr r7,r7,r10,lsl#24 -+ str r6,[$ctx,#8] -+ and r7,r7,r3 -+ str r7,[$ctx,#12] -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+ stmia r2,{r11,r12} @ fill functions table -+ mov r0,#1 -+#else -+ mov r0,#0 -+#endif -+.Lno_key: -+ ldmia sp!,{r4-r11} -+#if __ARM_ARCH__>=5 -+ ret @ bx lr -+#else -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ bx lr @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_init,.-poly1305_init -+___ -+{ -+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12)); -+my ($s1,$s2,$s3)=($r1,$r2,$r3); -+ -+$code.=<<___; -+.type poly1305_blocks,%function -+.align 5 -+poly1305_blocks: -+.Lpoly1305_blocks: -+ stmdb sp!,{r3-r11,lr} -+ -+ ands $len,$len,#-16 -+ beq .Lno_data -+ -+ add $len,$len,$inp @ end pointer -+ sub sp,sp,#32 -+ -+#if __ARM_ARCH__<7 -+ ldmia $ctx,{$h0-$r3} @ load context -+ add $ctx,$ctx,#20 -+ str $len,[sp,#16] @ offload stuff -+ str $ctx,[sp,#12] -+#else -+ ldr lr,[$ctx,#36] @ is_base2_26 -+ ldmia $ctx!,{$h0-$h4} @ load hash value -+ str $len,[sp,#16] @ offload stuff -+ str $ctx,[sp,#12] -+ -+ adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 -+ mov $r1,$h1,lsr#6 -+ adcs $r1,$r1,$h2,lsl#20 -+ mov $r2,$h2,lsr#12 -+ adcs $r2,$r2,$h3,lsl#14 -+ mov $r3,$h3,lsr#18 -+ adcs $r3,$r3,$h4,lsl#8 -+ mov $len,#0 -+ teq lr,#0 -+ str $len,[$ctx,#16] @ clear is_base2_26 -+ adc $len,$len,$h4,lsr#24 -+ -+ itttt ne -+ movne $h0,$r0 @ choose between radixes -+ movne $h1,$r1 -+ movne $h2,$r2 -+ movne $h3,$r3 -+ ldmia $ctx,{$r0-$r3} @ load key -+ it ne -+ movne $h4,$len -+#endif -+ -+ mov lr,$inp -+ cmp $padbit,#0 -+ str $r1,[sp,#20] -+ str $r2,[sp,#24] -+ str $r3,[sp,#28] -+ b .Loop -+ -+.align 4 -+.Loop: -+#if __ARM_ARCH__<7 -+ ldrb r0,[lr],#16 @ load input -+# ifdef __thumb2__ -+ it hi -+# endif -+ addhi $h4,$h4,#1 @ 1<<128 -+ ldrb r1,[lr,#-15] -+ ldrb r2,[lr,#-14] -+ ldrb r3,[lr,#-13] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-12] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-11] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-10] -+ adds $h0,$h0,r3 @ accumulate input -+ -+ ldrb r3,[lr,#-9] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-8] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-7] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-6] -+ adcs $h1,$h1,r3 -+ -+ ldrb r3,[lr,#-5] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-4] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-3] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-2] -+ adcs $h2,$h2,r3 -+ -+ ldrb r3,[lr,#-1] -+ orr r1,r0,r1,lsl#8 -+ str lr,[sp,#8] @ offload input pointer -+ orr r2,r1,r2,lsl#16 -+ add $s1,$r1,$r1,lsr#2 -+ orr r3,r2,r3,lsl#24 -+#else -+ ldr r0,[lr],#16 @ load input -+ it hi -+ addhi $h4,$h4,#1 @ padbit -+ ldr r1,[lr,#-12] -+ ldr r2,[lr,#-8] -+ ldr r3,[lr,#-4] -+# ifdef __ARMEB__ -+ rev r0,r0 -+ rev r1,r1 -+ rev r2,r2 -+ rev r3,r3 -+# endif -+ adds $h0,$h0,r0 @ accumulate input -+ str lr,[sp,#8] @ offload input pointer -+ adcs $h1,$h1,r1 -+ add $s1,$r1,$r1,lsr#2 -+ adcs $h2,$h2,r2 -+#endif -+ add $s2,$r2,$r2,lsr#2 -+ adcs $h3,$h3,r3 -+ add $s3,$r3,$r3,lsr#2 -+ -+ umull r2,r3,$h1,$r0 -+ adc $h4,$h4,#0 -+ umull r0,r1,$h0,$r0 -+ umlal r2,r3,$h4,$s1 -+ umlal r0,r1,$h3,$s1 -+ ldr $r1,[sp,#20] @ reload $r1 -+ umlal r2,r3,$h2,$s3 -+ umlal r0,r1,$h1,$s3 -+ umlal r2,r3,$h3,$s2 -+ umlal r0,r1,$h2,$s2 -+ umlal r2,r3,$h0,$r1 -+ str r0,[sp,#0] @ future $h0 -+ mul r0,$s2,$h4 -+ ldr $r2,[sp,#24] @ reload $r2 -+ adds r2,r2,r1 @ d1+=d0>>32 -+ eor r1,r1,r1 -+ adc lr,r3,#0 @ future $h2 -+ str r2,[sp,#4] @ future $h1 -+ -+ mul r2,$s3,$h4 -+ eor r3,r3,r3 -+ umlal r0,r1,$h3,$s3 -+ ldr $r3,[sp,#28] @ reload $r3 -+ umlal r2,r3,$h3,$r0 -+ umlal r0,r1,$h2,$r0 -+ umlal r2,r3,$h2,$r1 -+ umlal r0,r1,$h1,$r1 -+ umlal r2,r3,$h1,$r2 -+ umlal r0,r1,$h0,$r2 -+ umlal r2,r3,$h0,$r3 -+ ldr $h0,[sp,#0] -+ mul $h4,$r0,$h4 -+ ldr $h1,[sp,#4] -+ -+ adds $h2,lr,r0 @ d2+=d1>>32 -+ ldr lr,[sp,#8] @ reload input pointer -+ adc r1,r1,#0 -+ adds $h3,r2,r1 @ d3+=d2>>32 -+ ldr r0,[sp,#16] @ reload end pointer -+ adc r3,r3,#0 -+ add $h4,$h4,r3 @ h4+=d3>>32 -+ -+ and r1,$h4,#-4 -+ and $h4,$h4,#3 -+ add r1,r1,r1,lsr#2 @ *=5 -+ adds $h0,$h0,r1 -+ adcs $h1,$h1,#0 -+ adcs $h2,$h2,#0 -+ adcs $h3,$h3,#0 -+ adc $h4,$h4,#0 -+ -+ cmp r0,lr @ done yet? -+ bhi .Loop -+ -+ ldr $ctx,[sp,#12] -+ add sp,sp,#32 -+ stmdb $ctx,{$h0-$h4} @ store the result -+ -+.Lno_data: -+#if __ARM_ARCH__>=5 -+ ldmia sp!,{r3-r11,pc} -+#else -+ ldmia sp!,{r3-r11,lr} -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ bx lr @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_blocks,.-poly1305_blocks -+___ -+} -+{ -+my ($ctx,$mac,$nonce)=map("r$_",(0..2)); -+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11)); -+my $g4=$ctx; -+ -+$code.=<<___; -+.type poly1305_emit,%function -+.align 5 -+poly1305_emit: -+.Lpoly1305_emit: -+ stmdb sp!,{r4-r11} -+ -+ ldmia $ctx,{$h0-$h4} -+ -+#if __ARM_ARCH__>=7 -+ ldr ip,[$ctx,#36] @ is_base2_26 -+ -+ adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 -+ mov $g1,$h1,lsr#6 -+ adcs $g1,$g1,$h2,lsl#20 -+ mov $g2,$h2,lsr#12 -+ adcs $g2,$g2,$h3,lsl#14 -+ mov $g3,$h3,lsr#18 -+ adcs $g3,$g3,$h4,lsl#8 -+ mov $g4,#0 -+ adc $g4,$g4,$h4,lsr#24 -+ -+ tst ip,ip -+ itttt ne -+ movne $h0,$g0 -+ movne $h1,$g1 -+ movne $h2,$g2 -+ movne $h3,$g3 -+ it ne -+ movne $h4,$g4 -+#endif -+ -+ adds $g0,$h0,#5 @ compare to modulus -+ adcs $g1,$h1,#0 -+ adcs $g2,$h2,#0 -+ adcs $g3,$h3,#0 -+ adc $g4,$h4,#0 -+ tst $g4,#4 @ did it carry/borrow? -+ -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne $h0,$g0 -+ ldr $g0,[$nonce,#0] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne $h1,$g1 -+ ldr $g1,[$nonce,#4] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne $h2,$g2 -+ ldr $g2,[$nonce,#8] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne $h3,$g3 -+ ldr $g3,[$nonce,#12] -+ -+ adds $h0,$h0,$g0 -+ adcs $h1,$h1,$g1 -+ adcs $h2,$h2,$g2 -+ adc $h3,$h3,$g3 -+ -+#if __ARM_ARCH__>=7 -+# ifdef __ARMEB__ -+ rev $h0,$h0 -+ rev $h1,$h1 -+ rev $h2,$h2 -+ rev $h3,$h3 -+# endif -+ str $h0,[$mac,#0] -+ str $h1,[$mac,#4] -+ str $h2,[$mac,#8] -+ str $h3,[$mac,#12] -+#else -+ strb $h0,[$mac,#0] -+ mov $h0,$h0,lsr#8 -+ strb $h1,[$mac,#4] -+ mov $h1,$h1,lsr#8 -+ strb $h2,[$mac,#8] -+ mov $h2,$h2,lsr#8 -+ strb $h3,[$mac,#12] -+ mov $h3,$h3,lsr#8 -+ -+ strb $h0,[$mac,#1] -+ mov $h0,$h0,lsr#8 -+ strb $h1,[$mac,#5] -+ mov $h1,$h1,lsr#8 -+ strb $h2,[$mac,#9] -+ mov $h2,$h2,lsr#8 -+ strb $h3,[$mac,#13] -+ mov $h3,$h3,lsr#8 -+ -+ strb $h0,[$mac,#2] -+ mov $h0,$h0,lsr#8 -+ strb $h1,[$mac,#6] -+ mov $h1,$h1,lsr#8 -+ strb $h2,[$mac,#10] -+ mov $h2,$h2,lsr#8 -+ strb $h3,[$mac,#14] -+ mov $h3,$h3,lsr#8 -+ -+ strb $h0,[$mac,#3] -+ strb $h1,[$mac,#7] -+ strb $h2,[$mac,#11] -+ strb $h3,[$mac,#15] -+#endif -+ ldmia sp!,{r4-r11} -+#if __ARM_ARCH__>=5 -+ ret @ bx lr -+#else -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ bx lr @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_emit,.-poly1305_emit -+___ -+{ -+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9)); -+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14)); -+my ($T0,$T1,$MASK) = map("q$_",(15,4,0)); -+ -+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7)); -+ -+$code.=<<___; -+#if __ARM_MAX_ARCH__>=7 -+.fpu neon -+ -+.type poly1305_init_neon,%function -+.align 5 -+poly1305_init_neon: -+.Lpoly1305_init_neon: -+ ldr r3,[$ctx,#48] @ first table element -+ cmp r3,#-1 @ is value impossible? -+ bne .Lno_init_neon -+ -+ ldr r4,[$ctx,#20] @ load key base 2^32 -+ ldr r5,[$ctx,#24] -+ ldr r6,[$ctx,#28] -+ ldr r7,[$ctx,#32] -+ -+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 -+ mov r3,r4,lsr#26 -+ mov r4,r5,lsr#20 -+ orr r3,r3,r5,lsl#6 -+ mov r5,r6,lsr#14 -+ orr r4,r4,r6,lsl#12 -+ mov r6,r7,lsr#8 -+ orr r5,r5,r7,lsl#18 -+ and r3,r3,#0x03ffffff -+ and r4,r4,#0x03ffffff -+ and r5,r5,#0x03ffffff -+ -+ vdup.32 $R0,r2 @ r^1 in both lanes -+ add r2,r3,r3,lsl#2 @ *5 -+ vdup.32 $R1,r3 -+ add r3,r4,r4,lsl#2 -+ vdup.32 $S1,r2 -+ vdup.32 $R2,r4 -+ add r4,r5,r5,lsl#2 -+ vdup.32 $S2,r3 -+ vdup.32 $R3,r5 -+ add r5,r6,r6,lsl#2 -+ vdup.32 $S3,r4 -+ vdup.32 $R4,r6 -+ vdup.32 $S4,r5 -+ -+ mov $zeros,#2 @ counter -+ -+.Lsquare_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ -+ vmull.u32 $D0,$R0,${R0}[1] -+ vmull.u32 $D1,$R1,${R0}[1] -+ vmull.u32 $D2,$R2,${R0}[1] -+ vmull.u32 $D3,$R3,${R0}[1] -+ vmull.u32 $D4,$R4,${R0}[1] -+ -+ vmlal.u32 $D0,$R4,${S1}[1] -+ vmlal.u32 $D1,$R0,${R1}[1] -+ vmlal.u32 $D2,$R1,${R1}[1] -+ vmlal.u32 $D3,$R2,${R1}[1] -+ vmlal.u32 $D4,$R3,${R1}[1] -+ -+ vmlal.u32 $D0,$R3,${S2}[1] -+ vmlal.u32 $D1,$R4,${S2}[1] -+ vmlal.u32 $D3,$R1,${R2}[1] -+ vmlal.u32 $D2,$R0,${R2}[1] -+ vmlal.u32 $D4,$R2,${R2}[1] -+ -+ vmlal.u32 $D0,$R2,${S3}[1] -+ vmlal.u32 $D3,$R0,${R3}[1] -+ vmlal.u32 $D1,$R3,${S3}[1] -+ vmlal.u32 $D2,$R4,${S3}[1] -+ vmlal.u32 $D4,$R1,${R3}[1] -+ -+ vmlal.u32 $D3,$R4,${S4}[1] -+ vmlal.u32 $D0,$R1,${S4}[1] -+ vmlal.u32 $D1,$R2,${S4}[1] -+ vmlal.u32 $D2,$R3,${S4}[1] -+ vmlal.u32 $D4,$R0,${R4}[1] -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein -+ @ and P. Schwabe -+ @ -+ @ H0>>+H1>>+H2>>+H3>>+H4 -+ @ H3>>+H4>>*5+H0>>+H1 -+ @ -+ @ Trivia. -+ @ -+ @ Result of multiplication of n-bit number by m-bit number is -+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number, -+ @ m-bit number multiplied by 2^n is still n+m bits wide. -+ @ -+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, -+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit -+ @ one is n+1 bits wide. -+ @ -+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that -+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 -+ @ can be 27. However! In cases when their width exceeds 26 bits -+ @ they are limited by 2^26+2^6. This in turn means that *sum* -+ @ of the products with these values can still be viewed as sum -+ @ of 52-bit numbers as long as the amount of addends is not a -+ @ power of 2. For example, -+ @ -+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, -+ @ -+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or -+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than -+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by -+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), -+ @ which is less than 32 * (2^52) or 2^57. And when processing -+ @ data we are looking at triple as many addends... -+ @ -+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and -+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the -+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while -+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 -+ @ instruction accepts 2x32-bit input and writes 2x64-bit result. -+ @ This means that result of reduction have to be compressed upon -+ @ loop wrap-around. This can be done in the process of reduction -+ @ to minimize amount of instructions [as well as amount of -+ @ 128-bit instructions, which benefits low-end processors], but -+ @ one has to watch for H2 (which is narrower than H0) and 5*H4 -+ @ not being wider than 58 bits, so that result of right shift -+ @ by 26 bits fits in 32 bits. This is also useful on x86, -+ @ because it allows to use paddd in place for paddq, which -+ @ benefits Atom, where paddq is ridiculously slow. -+ -+ vshr.u64 $T0,$D3,#26 -+ vmovn.i64 $D3#lo,$D3 -+ vshr.u64 $T1,$D0,#26 -+ vmovn.i64 $D0#lo,$D0 -+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4 -+ vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff -+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1 -+ vbic.i32 $D0#lo,#0xfc000000 -+ -+ vshrn.u64 $T0#lo,$D4,#26 -+ vmovn.i64 $D4#lo,$D4 -+ vshr.u64 $T1,$D1,#26 -+ vmovn.i64 $D1#lo,$D1 -+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2 -+ vbic.i32 $D4#lo,#0xfc000000 -+ vbic.i32 $D1#lo,#0xfc000000 -+ -+ vadd.i32 $D0#lo,$D0#lo,$T0#lo -+ vshl.u32 $T0#lo,$T0#lo,#2 -+ vshrn.u64 $T1#lo,$D2,#26 -+ vmovn.i64 $D2#lo,$D2 -+ vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0 -+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 -+ vbic.i32 $D2#lo,#0xfc000000 -+ -+ vshr.u32 $T0#lo,$D0#lo,#26 -+ vbic.i32 $D0#lo,#0xfc000000 -+ vshr.u32 $T1#lo,$D3#lo,#26 -+ vbic.i32 $D3#lo,#0xfc000000 -+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 -+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 -+ -+ subs $zeros,$zeros,#1 -+ beq .Lsquare_break_neon -+ -+ add $tbl0,$ctx,#(48+0*9*4) -+ add $tbl1,$ctx,#(48+1*9*4) -+ -+ vtrn.32 $R0,$D0#lo @ r^2:r^1 -+ vtrn.32 $R2,$D2#lo -+ vtrn.32 $R3,$D3#lo -+ vtrn.32 $R1,$D1#lo -+ vtrn.32 $R4,$D4#lo -+ -+ vshl.u32 $S2,$R2,#2 @ *5 -+ vshl.u32 $S3,$R3,#2 -+ vshl.u32 $S1,$R1,#2 -+ vshl.u32 $S4,$R4,#2 -+ vadd.i32 $S2,$S2,$R2 -+ vadd.i32 $S1,$S1,$R1 -+ vadd.i32 $S3,$S3,$R3 -+ vadd.i32 $S4,$S4,$R4 -+ -+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! -+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! -+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! -+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! -+ vst1.32 {${S4}[0]},[$tbl0,:32] -+ vst1.32 {${S4}[1]},[$tbl1,:32] -+ -+ b .Lsquare_neon -+ -+.align 4 -+.Lsquare_break_neon: -+ add $tbl0,$ctx,#(48+2*4*9) -+ add $tbl1,$ctx,#(48+3*4*9) -+ -+ vmov $R0,$D0#lo @ r^4:r^3 -+ vshl.u32 $S1,$D1#lo,#2 @ *5 -+ vmov $R1,$D1#lo -+ vshl.u32 $S2,$D2#lo,#2 -+ vmov $R2,$D2#lo -+ vshl.u32 $S3,$D3#lo,#2 -+ vmov $R3,$D3#lo -+ vshl.u32 $S4,$D4#lo,#2 -+ vmov $R4,$D4#lo -+ vadd.i32 $S1,$S1,$D1#lo -+ vadd.i32 $S2,$S2,$D2#lo -+ vadd.i32 $S3,$S3,$D3#lo -+ vadd.i32 $S4,$S4,$D4#lo -+ -+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! -+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! -+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! -+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! -+ vst1.32 {${S4}[0]},[$tbl0] -+ vst1.32 {${S4}[1]},[$tbl1] -+ -+.Lno_init_neon: -+ ret @ bx lr -+.size poly1305_init_neon,.-poly1305_init_neon -+ -+.type poly1305_blocks_neon,%function -+.align 5 -+poly1305_blocks_neon: -+.Lpoly1305_blocks_neon: -+ ldr ip,[$ctx,#36] @ is_base2_26 -+ -+ cmp $len,#64 -+ blo .Lpoly1305_blocks -+ -+ stmdb sp!,{r4-r7} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+ -+ tst ip,ip @ is_base2_26? -+ bne .Lbase2_26_neon -+ -+ stmdb sp!,{r1-r3,lr} -+ bl .Lpoly1305_init_neon -+ -+ ldr r4,[$ctx,#0] @ load hash value base 2^32 -+ ldr r5,[$ctx,#4] -+ ldr r6,[$ctx,#8] -+ ldr r7,[$ctx,#12] -+ ldr ip,[$ctx,#16] -+ -+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 -+ mov r3,r4,lsr#26 -+ veor $D0#lo,$D0#lo,$D0#lo -+ mov r4,r5,lsr#20 -+ orr r3,r3,r5,lsl#6 -+ veor $D1#lo,$D1#lo,$D1#lo -+ mov r5,r6,lsr#14 -+ orr r4,r4,r6,lsl#12 -+ veor $D2#lo,$D2#lo,$D2#lo -+ mov r6,r7,lsr#8 -+ orr r5,r5,r7,lsl#18 -+ veor $D3#lo,$D3#lo,$D3#lo -+ and r3,r3,#0x03ffffff -+ orr r6,r6,ip,lsl#24 -+ veor $D4#lo,$D4#lo,$D4#lo -+ and r4,r4,#0x03ffffff -+ mov r1,#1 -+ and r5,r5,#0x03ffffff -+ str r1,[$ctx,#36] @ set is_base2_26 -+ -+ vmov.32 $D0#lo[0],r2 -+ vmov.32 $D1#lo[0],r3 -+ vmov.32 $D2#lo[0],r4 -+ vmov.32 $D3#lo[0],r5 -+ vmov.32 $D4#lo[0],r6 -+ adr $zeros,.Lzeros -+ -+ ldmia sp!,{r1-r3,lr} -+ b .Lhash_loaded -+ -+.align 4 -+.Lbase2_26_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ load hash value -+ -+ veor $D0#lo,$D0#lo,$D0#lo -+ veor $D1#lo,$D1#lo,$D1#lo -+ veor $D2#lo,$D2#lo,$D2#lo -+ veor $D3#lo,$D3#lo,$D3#lo -+ veor $D4#lo,$D4#lo,$D4#lo -+ vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! -+ adr $zeros,.Lzeros -+ vld1.32 {$D4#lo[0]},[$ctx] -+ sub $ctx,$ctx,#16 @ rewind -+ -+.Lhash_loaded: -+ add $in2,$inp,#32 -+ mov $padbit,$padbit,lsl#24 -+ tst $len,#31 -+ beq .Leven -+ -+ vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]! -+ vmov.32 $H4#lo[0],$padbit -+ sub $len,$len,#16 -+ add $in2,$inp,#32 -+ -+# ifdef __ARMEB__ -+ vrev32.8 $H0,$H0 -+ vrev32.8 $H3,$H3 -+ vrev32.8 $H1,$H1 -+ vrev32.8 $H2,$H2 -+# endif -+ vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26 -+ vshl.u32 $H3#lo,$H3#lo,#18 -+ -+ vsri.u32 $H3#lo,$H2#lo,#14 -+ vshl.u32 $H2#lo,$H2#lo,#12 -+ vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi -+ -+ vbic.i32 $H3#lo,#0xfc000000 -+ vsri.u32 $H2#lo,$H1#lo,#20 -+ vshl.u32 $H1#lo,$H1#lo,#6 -+ -+ vbic.i32 $H2#lo,#0xfc000000 -+ vsri.u32 $H1#lo,$H0#lo,#26 -+ vadd.i32 $H3#hi,$H3#lo,$D3#lo -+ -+ vbic.i32 $H0#lo,#0xfc000000 -+ vbic.i32 $H1#lo,#0xfc000000 -+ vadd.i32 $H2#hi,$H2#lo,$D2#lo -+ -+ vadd.i32 $H0#hi,$H0#lo,$D0#lo -+ vadd.i32 $H1#hi,$H1#lo,$D1#lo -+ -+ mov $tbl1,$zeros -+ add $tbl0,$ctx,#48 -+ -+ cmp $len,$len -+ b .Long_tail -+ -+.align 4 -+.Leven: -+ subs $len,$len,#64 -+ it lo -+ movlo $in2,$zeros -+ -+ vmov.i32 $H4,#1<<24 @ padbit, yes, always -+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] -+ add $inp,$inp,#64 -+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) -+ add $in2,$in2,#64 -+ itt hi -+ addhi $tbl1,$ctx,#(48+1*9*4) -+ addhi $tbl0,$ctx,#(48+3*9*4) -+ -+# ifdef __ARMEB__ -+ vrev32.8 $H0,$H0 -+ vrev32.8 $H3,$H3 -+ vrev32.8 $H1,$H1 -+ vrev32.8 $H2,$H2 -+# endif -+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 -+ vshl.u32 $H3,$H3,#18 -+ -+ vsri.u32 $H3,$H2,#14 -+ vshl.u32 $H2,$H2,#12 -+ -+ vbic.i32 $H3,#0xfc000000 -+ vsri.u32 $H2,$H1,#20 -+ vshl.u32 $H1,$H1,#6 -+ -+ vbic.i32 $H2,#0xfc000000 -+ vsri.u32 $H1,$H0,#26 -+ -+ vbic.i32 $H0,#0xfc000000 -+ vbic.i32 $H1,#0xfc000000 -+ -+ bls .Lskip_loop -+ -+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2 -+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 -+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! -+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! -+ b .Loop_neon -+ -+.align 5 -+.Loop_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 -+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r -+ @ \___________________/ -+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 -+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r -+ @ \___________________/ \____________________/ -+ @ -+ @ Note that we start with inp[2:3]*r^2. This is because it -+ @ doesn't depend on reduction in previous iteration. -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ inp[2:3]*r^2 -+ -+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1] -+ vmull.u32 $D2,$H2#hi,${R0}[1] -+ vadd.i32 $H0#lo,$H0#lo,$D0#lo -+ vmull.u32 $D0,$H0#hi,${R0}[1] -+ vadd.i32 $H3#lo,$H3#lo,$D3#lo -+ vmull.u32 $D3,$H3#hi,${R0}[1] -+ vmlal.u32 $D2,$H1#hi,${R1}[1] -+ vadd.i32 $H1#lo,$H1#lo,$D1#lo -+ vmull.u32 $D1,$H1#hi,${R0}[1] -+ -+ vadd.i32 $H4#lo,$H4#lo,$D4#lo -+ vmull.u32 $D4,$H4#hi,${R0}[1] -+ subs $len,$len,#64 -+ vmlal.u32 $D0,$H4#hi,${S1}[1] -+ it lo -+ movlo $in2,$zeros -+ vmlal.u32 $D3,$H2#hi,${R1}[1] -+ vld1.32 ${S4}[1],[$tbl1,:32] -+ vmlal.u32 $D1,$H0#hi,${R1}[1] -+ vmlal.u32 $D4,$H3#hi,${R1}[1] -+ -+ vmlal.u32 $D0,$H3#hi,${S2}[1] -+ vmlal.u32 $D3,$H1#hi,${R2}[1] -+ vmlal.u32 $D4,$H2#hi,${R2}[1] -+ vmlal.u32 $D1,$H4#hi,${S2}[1] -+ vmlal.u32 $D2,$H0#hi,${R2}[1] -+ -+ vmlal.u32 $D3,$H0#hi,${R3}[1] -+ vmlal.u32 $D0,$H2#hi,${S3}[1] -+ vmlal.u32 $D4,$H1#hi,${R3}[1] -+ vmlal.u32 $D1,$H3#hi,${S3}[1] -+ vmlal.u32 $D2,$H4#hi,${S3}[1] -+ -+ vmlal.u32 $D3,$H4#hi,${S4}[1] -+ vmlal.u32 $D0,$H1#hi,${S4}[1] -+ vmlal.u32 $D4,$H0#hi,${R4}[1] -+ vmlal.u32 $D1,$H2#hi,${S4}[1] -+ vmlal.u32 $D2,$H3#hi,${S4}[1] -+ -+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) -+ add $in2,$in2,#64 -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ (hash+inp[0:1])*r^4 and accumulate -+ -+ vmlal.u32 $D3,$H3#lo,${R0}[0] -+ vmlal.u32 $D0,$H0#lo,${R0}[0] -+ vmlal.u32 $D4,$H4#lo,${R0}[0] -+ vmlal.u32 $D1,$H1#lo,${R0}[0] -+ vmlal.u32 $D2,$H2#lo,${R0}[0] -+ vld1.32 ${S4}[0],[$tbl0,:32] -+ -+ vmlal.u32 $D3,$H2#lo,${R1}[0] -+ vmlal.u32 $D0,$H4#lo,${S1}[0] -+ vmlal.u32 $D4,$H3#lo,${R1}[0] -+ vmlal.u32 $D1,$H0#lo,${R1}[0] -+ vmlal.u32 $D2,$H1#lo,${R1}[0] -+ -+ vmlal.u32 $D3,$H1#lo,${R2}[0] -+ vmlal.u32 $D0,$H3#lo,${S2}[0] -+ vmlal.u32 $D4,$H2#lo,${R2}[0] -+ vmlal.u32 $D1,$H4#lo,${S2}[0] -+ vmlal.u32 $D2,$H0#lo,${R2}[0] -+ -+ vmlal.u32 $D3,$H0#lo,${R3}[0] -+ vmlal.u32 $D0,$H2#lo,${S3}[0] -+ vmlal.u32 $D4,$H1#lo,${R3}[0] -+ vmlal.u32 $D1,$H3#lo,${S3}[0] -+ vmlal.u32 $D3,$H4#lo,${S4}[0] -+ -+ vmlal.u32 $D2,$H4#lo,${S3}[0] -+ vmlal.u32 $D0,$H1#lo,${S4}[0] -+ vmlal.u32 $D4,$H0#lo,${R4}[0] -+ vmov.i32 $H4,#1<<24 @ padbit, yes, always -+ vmlal.u32 $D1,$H2#lo,${S4}[0] -+ vmlal.u32 $D2,$H3#lo,${S4}[0] -+ -+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] -+ add $inp,$inp,#64 -+# ifdef __ARMEB__ -+ vrev32.8 $H0,$H0 -+ vrev32.8 $H1,$H1 -+ vrev32.8 $H2,$H2 -+ vrev32.8 $H3,$H3 -+# endif -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of -+ @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4. -+ -+ vshr.u64 $T0,$D3,#26 -+ vmovn.i64 $D3#lo,$D3 -+ vshr.u64 $T1,$D0,#26 -+ vmovn.i64 $D0#lo,$D0 -+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4 -+ vbic.i32 $D3#lo,#0xfc000000 -+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 -+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1 -+ vshl.u32 $H3,$H3,#18 -+ vbic.i32 $D0#lo,#0xfc000000 -+ -+ vshrn.u64 $T0#lo,$D4,#26 -+ vmovn.i64 $D4#lo,$D4 -+ vshr.u64 $T1,$D1,#26 -+ vmovn.i64 $D1#lo,$D1 -+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2 -+ vsri.u32 $H3,$H2,#14 -+ vbic.i32 $D4#lo,#0xfc000000 -+ vshl.u32 $H2,$H2,#12 -+ vbic.i32 $D1#lo,#0xfc000000 -+ -+ vadd.i32 $D0#lo,$D0#lo,$T0#lo -+ vshl.u32 $T0#lo,$T0#lo,#2 -+ vbic.i32 $H3,#0xfc000000 -+ vshrn.u64 $T1#lo,$D2,#26 -+ vmovn.i64 $D2#lo,$D2 -+ vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec] -+ vsri.u32 $H2,$H1,#20 -+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 -+ vshl.u32 $H1,$H1,#6 -+ vbic.i32 $D2#lo,#0xfc000000 -+ vbic.i32 $H2,#0xfc000000 -+ -+ vshrn.u64 $T0#lo,$D0,#26 @ re-narrow -+ vmovn.i64 $D0#lo,$D0 -+ vsri.u32 $H1,$H0,#26 -+ vbic.i32 $H0,#0xfc000000 -+ vshr.u32 $T1#lo,$D3#lo,#26 -+ vbic.i32 $D3#lo,#0xfc000000 -+ vbic.i32 $D0#lo,#0xfc000000 -+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 -+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 -+ vbic.i32 $H1,#0xfc000000 -+ -+ bhi .Loop_neon -+ -+.Lskip_loop: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 -+ -+ add $tbl1,$ctx,#(48+0*9*4) -+ add $tbl0,$ctx,#(48+1*9*4) -+ adds $len,$len,#32 -+ it ne -+ movne $len,#0 -+ bne .Long_tail -+ -+ vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi -+ vadd.i32 $H0#hi,$H0#lo,$D0#lo -+ vadd.i32 $H3#hi,$H3#lo,$D3#lo -+ vadd.i32 $H1#hi,$H1#lo,$D1#lo -+ vadd.i32 $H4#hi,$H4#lo,$D4#lo -+ -+.Long_tail: -+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1 -+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2 -+ -+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant -+ vmull.u32 $D2,$H2#hi,$R0 -+ vadd.i32 $H0#lo,$H0#lo,$D0#lo -+ vmull.u32 $D0,$H0#hi,$R0 -+ vadd.i32 $H3#lo,$H3#lo,$D3#lo -+ vmull.u32 $D3,$H3#hi,$R0 -+ vadd.i32 $H1#lo,$H1#lo,$D1#lo -+ vmull.u32 $D1,$H1#hi,$R0 -+ vadd.i32 $H4#lo,$H4#lo,$D4#lo -+ vmull.u32 $D4,$H4#hi,$R0 -+ -+ vmlal.u32 $D0,$H4#hi,$S1 -+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! -+ vmlal.u32 $D3,$H2#hi,$R1 -+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! -+ vmlal.u32 $D1,$H0#hi,$R1 -+ vmlal.u32 $D4,$H3#hi,$R1 -+ vmlal.u32 $D2,$H1#hi,$R1 -+ -+ vmlal.u32 $D3,$H1#hi,$R2 -+ vld1.32 ${S4}[1],[$tbl1,:32] -+ vmlal.u32 $D0,$H3#hi,$S2 -+ vld1.32 ${S4}[0],[$tbl0,:32] -+ vmlal.u32 $D4,$H2#hi,$R2 -+ vmlal.u32 $D1,$H4#hi,$S2 -+ vmlal.u32 $D2,$H0#hi,$R2 -+ -+ vmlal.u32 $D3,$H0#hi,$R3 -+ it ne -+ addne $tbl1,$ctx,#(48+2*9*4) -+ vmlal.u32 $D0,$H2#hi,$S3 -+ it ne -+ addne $tbl0,$ctx,#(48+3*9*4) -+ vmlal.u32 $D4,$H1#hi,$R3 -+ vmlal.u32 $D1,$H3#hi,$S3 -+ vmlal.u32 $D2,$H4#hi,$S3 -+ -+ vmlal.u32 $D3,$H4#hi,$S4 -+ vorn $MASK,$MASK,$MASK @ all-ones, can be redundant -+ vmlal.u32 $D0,$H1#hi,$S4 -+ vshr.u64 $MASK,$MASK,#38 -+ vmlal.u32 $D4,$H0#hi,$R4 -+ vmlal.u32 $D1,$H2#hi,$S4 -+ vmlal.u32 $D2,$H3#hi,$S4 -+ -+ beq .Lshort_tail -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ (hash+inp[0:1])*r^4:r^3 and accumulate -+ -+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3 -+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 -+ -+ vmlal.u32 $D2,$H2#lo,$R0 -+ vmlal.u32 $D0,$H0#lo,$R0 -+ vmlal.u32 $D3,$H3#lo,$R0 -+ vmlal.u32 $D1,$H1#lo,$R0 -+ vmlal.u32 $D4,$H4#lo,$R0 -+ -+ vmlal.u32 $D0,$H4#lo,$S1 -+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! -+ vmlal.u32 $D3,$H2#lo,$R1 -+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! -+ vmlal.u32 $D1,$H0#lo,$R1 -+ vmlal.u32 $D4,$H3#lo,$R1 -+ vmlal.u32 $D2,$H1#lo,$R1 -+ -+ vmlal.u32 $D3,$H1#lo,$R2 -+ vld1.32 ${S4}[1],[$tbl1,:32] -+ vmlal.u32 $D0,$H3#lo,$S2 -+ vld1.32 ${S4}[0],[$tbl0,:32] -+ vmlal.u32 $D4,$H2#lo,$R2 -+ vmlal.u32 $D1,$H4#lo,$S2 -+ vmlal.u32 $D2,$H0#lo,$R2 -+ -+ vmlal.u32 $D3,$H0#lo,$R3 -+ vmlal.u32 $D0,$H2#lo,$S3 -+ vmlal.u32 $D4,$H1#lo,$R3 -+ vmlal.u32 $D1,$H3#lo,$S3 -+ vmlal.u32 $D2,$H4#lo,$S3 -+ -+ vmlal.u32 $D3,$H4#lo,$S4 -+ vorn $MASK,$MASK,$MASK @ all-ones -+ vmlal.u32 $D0,$H1#lo,$S4 -+ vshr.u64 $MASK,$MASK,#38 -+ vmlal.u32 $D4,$H0#lo,$R4 -+ vmlal.u32 $D1,$H2#lo,$S4 -+ vmlal.u32 $D2,$H3#lo,$S4 -+ -+.Lshort_tail: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ horizontal addition -+ -+ vadd.i64 $D3#lo,$D3#lo,$D3#hi -+ vadd.i64 $D0#lo,$D0#lo,$D0#hi -+ vadd.i64 $D4#lo,$D4#lo,$D4#hi -+ vadd.i64 $D1#lo,$D1#lo,$D1#hi -+ vadd.i64 $D2#lo,$D2#lo,$D2#hi -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction, but without narrowing -+ -+ vshr.u64 $T0,$D3,#26 -+ vand.i64 $D3,$D3,$MASK -+ vshr.u64 $T1,$D0,#26 -+ vand.i64 $D0,$D0,$MASK -+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4 -+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1 -+ -+ vshr.u64 $T0,$D4,#26 -+ vand.i64 $D4,$D4,$MASK -+ vshr.u64 $T1,$D1,#26 -+ vand.i64 $D1,$D1,$MASK -+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2 -+ -+ vadd.i64 $D0,$D0,$T0 -+ vshl.u64 $T0,$T0,#2 -+ vshr.u64 $T1,$D2,#26 -+ vand.i64 $D2,$D2,$MASK -+ vadd.i64 $D0,$D0,$T0 @ h4 -> h0 -+ vadd.i64 $D3,$D3,$T1 @ h2 -> h3 -+ -+ vshr.u64 $T0,$D0,#26 -+ vand.i64 $D0,$D0,$MASK -+ vshr.u64 $T1,$D3,#26 -+ vand.i64 $D3,$D3,$MASK -+ vadd.i64 $D1,$D1,$T0 @ h0 -> h1 -+ vadd.i64 $D4,$D4,$T1 @ h3 -> h4 -+ -+ cmp $len,#0 -+ bne .Leven -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ store hash value -+ -+ vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! -+ vst1.32 {$D4#lo[0]},[$ctx] -+ -+ vldmia sp!,{d8-d15} @ epilogue -+ ldmia sp!,{r4-r7} -+ ret @ bx lr -+.size poly1305_blocks_neon,.-poly1305_blocks_neon -+ -+.align 5 -+.Lzeros: -+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -+#ifndef __KERNEL__ -+.LOPENSSL_armcap: -+# ifdef _WIN32 -+.word OPENSSL_armcap_P -+# else -+.word OPENSSL_armcap_P-.Lpoly1305_init -+# endif -+.comm OPENSSL_armcap_P,4,4 -+.hidden OPENSSL_armcap_P -+#endif -+#endif -+___ -+} } -+$code.=<<___; -+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm" -+.align 2 -+___ -+ -+foreach (split("\n",$code)) { -+ s/\`([^\`]*)\`/eval $1/geo; -+ -+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or -+ s/\bret\b/bx lr/go or -+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 -+ -+ print $_,"\n"; -+} -+close STDOUT; # enforce flush ---- /dev/null -+++ b/arch/arm/crypto/poly1305-core.S_shipped -@@ -0,0 +1,1158 @@ -+#ifndef __KERNEL__ -+# include "arm_arch.h" -+#else -+# define __ARM_ARCH__ __LINUX_ARM_ARCH__ -+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -+# define poly1305_init poly1305_init_arm -+# define poly1305_blocks poly1305_blocks_arm -+# define poly1305_emit poly1305_emit_arm -+.globl poly1305_blocks_neon -+#endif -+ -+#if defined(__thumb2__) -+.syntax unified -+.thumb -+#else -+.code 32 -+#endif -+ -+.text -+ -+.globl poly1305_emit -+.globl poly1305_blocks -+.globl poly1305_init -+.type poly1305_init,%function -+.align 5 -+poly1305_init: -+.Lpoly1305_init: -+ stmdb sp!,{r4-r11} -+ -+ eor r3,r3,r3 -+ cmp r1,#0 -+ str r3,[r0,#0] @ zero hash value -+ str r3,[r0,#4] -+ str r3,[r0,#8] -+ str r3,[r0,#12] -+ str r3,[r0,#16] -+ str r3,[r0,#36] @ clear is_base2_26 -+ add r0,r0,#20 -+ -+#ifdef __thumb2__ -+ it eq -+#endif -+ moveq r0,#0 -+ beq .Lno_key -+ -+#if __ARM_MAX_ARCH__>=7 -+ mov r3,#-1 -+ str r3,[r0,#28] @ impossible key power value -+# ifndef __KERNEL__ -+ adr r11,.Lpoly1305_init -+ ldr r12,.LOPENSSL_armcap -+# endif -+#endif -+ ldrb r4,[r1,#0] -+ mov r10,#0x0fffffff -+ ldrb r5,[r1,#1] -+ and r3,r10,#-4 @ 0x0ffffffc -+ ldrb r6,[r1,#2] -+ ldrb r7,[r1,#3] -+ orr r4,r4,r5,lsl#8 -+ ldrb r5,[r1,#4] -+ orr r4,r4,r6,lsl#16 -+ ldrb r6,[r1,#5] -+ orr r4,r4,r7,lsl#24 -+ ldrb r7,[r1,#6] -+ and r4,r4,r10 -+ -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+# if !defined(_WIN32) -+ ldr r12,[r11,r12] @ OPENSSL_armcap_P -+# endif -+# if defined(__APPLE__) || defined(_WIN32) -+ ldr r12,[r12] -+# endif -+#endif -+ ldrb r8,[r1,#7] -+ orr r5,r5,r6,lsl#8 -+ ldrb r6,[r1,#8] -+ orr r5,r5,r7,lsl#16 -+ ldrb r7,[r1,#9] -+ orr r5,r5,r8,lsl#24 -+ ldrb r8,[r1,#10] -+ and r5,r5,r3 -+ -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+ tst r12,#ARMV7_NEON @ check for NEON -+# ifdef __thumb2__ -+ adr r9,.Lpoly1305_blocks_neon -+ adr r11,.Lpoly1305_blocks -+ it ne -+ movne r11,r9 -+ adr r12,.Lpoly1305_emit -+ orr r11,r11,#1 @ thumb-ify addresses -+ orr r12,r12,#1 -+# else -+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) -+ ite eq -+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) -+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) -+# endif -+#endif -+ ldrb r9,[r1,#11] -+ orr r6,r6,r7,lsl#8 -+ ldrb r7,[r1,#12] -+ orr r6,r6,r8,lsl#16 -+ ldrb r8,[r1,#13] -+ orr r6,r6,r9,lsl#24 -+ ldrb r9,[r1,#14] -+ and r6,r6,r3 -+ -+ ldrb r10,[r1,#15] -+ orr r7,r7,r8,lsl#8 -+ str r4,[r0,#0] -+ orr r7,r7,r9,lsl#16 -+ str r5,[r0,#4] -+ orr r7,r7,r10,lsl#24 -+ str r6,[r0,#8] -+ and r7,r7,r3 -+ str r7,[r0,#12] -+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) -+ stmia r2,{r11,r12} @ fill functions table -+ mov r0,#1 -+#else -+ mov r0,#0 -+#endif -+.Lno_key: -+ ldmia sp!,{r4-r11} -+#if __ARM_ARCH__>=5 -+ bx lr @ bx lr -+#else -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ .word 0xe12fff1e @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_init,.-poly1305_init -+.type poly1305_blocks,%function -+.align 5 -+poly1305_blocks: -+.Lpoly1305_blocks: -+ stmdb sp!,{r3-r11,lr} -+ -+ ands r2,r2,#-16 -+ beq .Lno_data -+ -+ add r2,r2,r1 @ end pointer -+ sub sp,sp,#32 -+ -+#if __ARM_ARCH__<7 -+ ldmia r0,{r4-r12} @ load context -+ add r0,r0,#20 -+ str r2,[sp,#16] @ offload stuff -+ str r0,[sp,#12] -+#else -+ ldr lr,[r0,#36] @ is_base2_26 -+ ldmia r0!,{r4-r8} @ load hash value -+ str r2,[sp,#16] @ offload stuff -+ str r0,[sp,#12] -+ -+ adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32 -+ mov r10,r5,lsr#6 -+ adcs r10,r10,r6,lsl#20 -+ mov r11,r6,lsr#12 -+ adcs r11,r11,r7,lsl#14 -+ mov r12,r7,lsr#18 -+ adcs r12,r12,r8,lsl#8 -+ mov r2,#0 -+ teq lr,#0 -+ str r2,[r0,#16] @ clear is_base2_26 -+ adc r2,r2,r8,lsr#24 -+ -+ itttt ne -+ movne r4,r9 @ choose between radixes -+ movne r5,r10 -+ movne r6,r11 -+ movne r7,r12 -+ ldmia r0,{r9-r12} @ load key -+ it ne -+ movne r8,r2 -+#endif -+ -+ mov lr,r1 -+ cmp r3,#0 -+ str r10,[sp,#20] -+ str r11,[sp,#24] -+ str r12,[sp,#28] -+ b .Loop -+ -+.align 4 -+.Loop: -+#if __ARM_ARCH__<7 -+ ldrb r0,[lr],#16 @ load input -+# ifdef __thumb2__ -+ it hi -+# endif -+ addhi r8,r8,#1 @ 1<<128 -+ ldrb r1,[lr,#-15] -+ ldrb r2,[lr,#-14] -+ ldrb r3,[lr,#-13] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-12] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-11] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-10] -+ adds r4,r4,r3 @ accumulate input -+ -+ ldrb r3,[lr,#-9] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-8] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-7] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-6] -+ adcs r5,r5,r3 -+ -+ ldrb r3,[lr,#-5] -+ orr r1,r0,r1,lsl#8 -+ ldrb r0,[lr,#-4] -+ orr r2,r1,r2,lsl#16 -+ ldrb r1,[lr,#-3] -+ orr r3,r2,r3,lsl#24 -+ ldrb r2,[lr,#-2] -+ adcs r6,r6,r3 -+ -+ ldrb r3,[lr,#-1] -+ orr r1,r0,r1,lsl#8 -+ str lr,[sp,#8] @ offload input pointer -+ orr r2,r1,r2,lsl#16 -+ add r10,r10,r10,lsr#2 -+ orr r3,r2,r3,lsl#24 -+#else -+ ldr r0,[lr],#16 @ load input -+ it hi -+ addhi r8,r8,#1 @ padbit -+ ldr r1,[lr,#-12] -+ ldr r2,[lr,#-8] -+ ldr r3,[lr,#-4] -+# ifdef __ARMEB__ -+ rev r0,r0 -+ rev r1,r1 -+ rev r2,r2 -+ rev r3,r3 -+# endif -+ adds r4,r4,r0 @ accumulate input -+ str lr,[sp,#8] @ offload input pointer -+ adcs r5,r5,r1 -+ add r10,r10,r10,lsr#2 -+ adcs r6,r6,r2 -+#endif -+ add r11,r11,r11,lsr#2 -+ adcs r7,r7,r3 -+ add r12,r12,r12,lsr#2 -+ -+ umull r2,r3,r5,r9 -+ adc r8,r8,#0 -+ umull r0,r1,r4,r9 -+ umlal r2,r3,r8,r10 -+ umlal r0,r1,r7,r10 -+ ldr r10,[sp,#20] @ reload r10 -+ umlal r2,r3,r6,r12 -+ umlal r0,r1,r5,r12 -+ umlal r2,r3,r7,r11 -+ umlal r0,r1,r6,r11 -+ umlal r2,r3,r4,r10 -+ str r0,[sp,#0] @ future r4 -+ mul r0,r11,r8 -+ ldr r11,[sp,#24] @ reload r11 -+ adds r2,r2,r1 @ d1+=d0>>32 -+ eor r1,r1,r1 -+ adc lr,r3,#0 @ future r6 -+ str r2,[sp,#4] @ future r5 -+ -+ mul r2,r12,r8 -+ eor r3,r3,r3 -+ umlal r0,r1,r7,r12 -+ ldr r12,[sp,#28] @ reload r12 -+ umlal r2,r3,r7,r9 -+ umlal r0,r1,r6,r9 -+ umlal r2,r3,r6,r10 -+ umlal r0,r1,r5,r10 -+ umlal r2,r3,r5,r11 -+ umlal r0,r1,r4,r11 -+ umlal r2,r3,r4,r12 -+ ldr r4,[sp,#0] -+ mul r8,r9,r8 -+ ldr r5,[sp,#4] -+ -+ adds r6,lr,r0 @ d2+=d1>>32 -+ ldr lr,[sp,#8] @ reload input pointer -+ adc r1,r1,#0 -+ adds r7,r2,r1 @ d3+=d2>>32 -+ ldr r0,[sp,#16] @ reload end pointer -+ adc r3,r3,#0 -+ add r8,r8,r3 @ h4+=d3>>32 -+ -+ and r1,r8,#-4 -+ and r8,r8,#3 -+ add r1,r1,r1,lsr#2 @ *=5 -+ adds r4,r4,r1 -+ adcs r5,r5,#0 -+ adcs r6,r6,#0 -+ adcs r7,r7,#0 -+ adc r8,r8,#0 -+ -+ cmp r0,lr @ done yet? -+ bhi .Loop -+ -+ ldr r0,[sp,#12] -+ add sp,sp,#32 -+ stmdb r0,{r4-r8} @ store the result -+ -+.Lno_data: -+#if __ARM_ARCH__>=5 -+ ldmia sp!,{r3-r11,pc} -+#else -+ ldmia sp!,{r3-r11,lr} -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ .word 0xe12fff1e @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_blocks,.-poly1305_blocks -+.type poly1305_emit,%function -+.align 5 -+poly1305_emit: -+.Lpoly1305_emit: -+ stmdb sp!,{r4-r11} -+ -+ ldmia r0,{r3-r7} -+ -+#if __ARM_ARCH__>=7 -+ ldr ip,[r0,#36] @ is_base2_26 -+ -+ adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32 -+ mov r9,r4,lsr#6 -+ adcs r9,r9,r5,lsl#20 -+ mov r10,r5,lsr#12 -+ adcs r10,r10,r6,lsl#14 -+ mov r11,r6,lsr#18 -+ adcs r11,r11,r7,lsl#8 -+ mov r0,#0 -+ adc r0,r0,r7,lsr#24 -+ -+ tst ip,ip -+ itttt ne -+ movne r3,r8 -+ movne r4,r9 -+ movne r5,r10 -+ movne r6,r11 -+ it ne -+ movne r7,r0 -+#endif -+ -+ adds r8,r3,#5 @ compare to modulus -+ adcs r9,r4,#0 -+ adcs r10,r5,#0 -+ adcs r11,r6,#0 -+ adc r0,r7,#0 -+ tst r0,#4 @ did it carry/borrow? -+ -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne r3,r8 -+ ldr r8,[r2,#0] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne r4,r9 -+ ldr r9,[r2,#4] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne r5,r10 -+ ldr r10,[r2,#8] -+#ifdef __thumb2__ -+ it ne -+#endif -+ movne r6,r11 -+ ldr r11,[r2,#12] -+ -+ adds r3,r3,r8 -+ adcs r4,r4,r9 -+ adcs r5,r5,r10 -+ adc r6,r6,r11 -+ -+#if __ARM_ARCH__>=7 -+# ifdef __ARMEB__ -+ rev r3,r3 -+ rev r4,r4 -+ rev r5,r5 -+ rev r6,r6 -+# endif -+ str r3,[r1,#0] -+ str r4,[r1,#4] -+ str r5,[r1,#8] -+ str r6,[r1,#12] -+#else -+ strb r3,[r1,#0] -+ mov r3,r3,lsr#8 -+ strb r4,[r1,#4] -+ mov r4,r4,lsr#8 -+ strb r5,[r1,#8] -+ mov r5,r5,lsr#8 -+ strb r6,[r1,#12] -+ mov r6,r6,lsr#8 -+ -+ strb r3,[r1,#1] -+ mov r3,r3,lsr#8 -+ strb r4,[r1,#5] -+ mov r4,r4,lsr#8 -+ strb r5,[r1,#9] -+ mov r5,r5,lsr#8 -+ strb r6,[r1,#13] -+ mov r6,r6,lsr#8 -+ -+ strb r3,[r1,#2] -+ mov r3,r3,lsr#8 -+ strb r4,[r1,#6] -+ mov r4,r4,lsr#8 -+ strb r5,[r1,#10] -+ mov r5,r5,lsr#8 -+ strb r6,[r1,#14] -+ mov r6,r6,lsr#8 -+ -+ strb r3,[r1,#3] -+ strb r4,[r1,#7] -+ strb r5,[r1,#11] -+ strb r6,[r1,#15] -+#endif -+ ldmia sp!,{r4-r11} -+#if __ARM_ARCH__>=5 -+ bx lr @ bx lr -+#else -+ tst lr,#1 -+ moveq pc,lr @ be binary compatible with V4, yet -+ .word 0xe12fff1e @ interoperable with Thumb ISA:-) -+#endif -+.size poly1305_emit,.-poly1305_emit -+#if __ARM_MAX_ARCH__>=7 -+.fpu neon -+ -+.type poly1305_init_neon,%function -+.align 5 -+poly1305_init_neon: -+.Lpoly1305_init_neon: -+ ldr r3,[r0,#48] @ first table element -+ cmp r3,#-1 @ is value impossible? -+ bne .Lno_init_neon -+ -+ ldr r4,[r0,#20] @ load key base 2^32 -+ ldr r5,[r0,#24] -+ ldr r6,[r0,#28] -+ ldr r7,[r0,#32] -+ -+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 -+ mov r3,r4,lsr#26 -+ mov r4,r5,lsr#20 -+ orr r3,r3,r5,lsl#6 -+ mov r5,r6,lsr#14 -+ orr r4,r4,r6,lsl#12 -+ mov r6,r7,lsr#8 -+ orr r5,r5,r7,lsl#18 -+ and r3,r3,#0x03ffffff -+ and r4,r4,#0x03ffffff -+ and r5,r5,#0x03ffffff -+ -+ vdup.32 d0,r2 @ r^1 in both lanes -+ add r2,r3,r3,lsl#2 @ *5 -+ vdup.32 d1,r3 -+ add r3,r4,r4,lsl#2 -+ vdup.32 d2,r2 -+ vdup.32 d3,r4 -+ add r4,r5,r5,lsl#2 -+ vdup.32 d4,r3 -+ vdup.32 d5,r5 -+ add r5,r6,r6,lsl#2 -+ vdup.32 d6,r4 -+ vdup.32 d7,r6 -+ vdup.32 d8,r5 -+ -+ mov r5,#2 @ counter -+ -+.Lsquare_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ -+ vmull.u32 q5,d0,d0[1] -+ vmull.u32 q6,d1,d0[1] -+ vmull.u32 q7,d3,d0[1] -+ vmull.u32 q8,d5,d0[1] -+ vmull.u32 q9,d7,d0[1] -+ -+ vmlal.u32 q5,d7,d2[1] -+ vmlal.u32 q6,d0,d1[1] -+ vmlal.u32 q7,d1,d1[1] -+ vmlal.u32 q8,d3,d1[1] -+ vmlal.u32 q9,d5,d1[1] -+ -+ vmlal.u32 q5,d5,d4[1] -+ vmlal.u32 q6,d7,d4[1] -+ vmlal.u32 q8,d1,d3[1] -+ vmlal.u32 q7,d0,d3[1] -+ vmlal.u32 q9,d3,d3[1] -+ -+ vmlal.u32 q5,d3,d6[1] -+ vmlal.u32 q8,d0,d5[1] -+ vmlal.u32 q6,d5,d6[1] -+ vmlal.u32 q7,d7,d6[1] -+ vmlal.u32 q9,d1,d5[1] -+ -+ vmlal.u32 q8,d7,d8[1] -+ vmlal.u32 q5,d1,d8[1] -+ vmlal.u32 q6,d3,d8[1] -+ vmlal.u32 q7,d5,d8[1] -+ vmlal.u32 q9,d0,d7[1] -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein -+ @ and P. Schwabe -+ @ -+ @ H0>>+H1>>+H2>>+H3>>+H4 -+ @ H3>>+H4>>*5+H0>>+H1 -+ @ -+ @ Trivia. -+ @ -+ @ Result of multiplication of n-bit number by m-bit number is -+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number, -+ @ m-bit number multiplied by 2^n is still n+m bits wide. -+ @ -+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, -+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit -+ @ one is n+1 bits wide. -+ @ -+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that -+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 -+ @ can be 27. However! In cases when their width exceeds 26 bits -+ @ they are limited by 2^26+2^6. This in turn means that *sum* -+ @ of the products with these values can still be viewed as sum -+ @ of 52-bit numbers as long as the amount of addends is not a -+ @ power of 2. For example, -+ @ -+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, -+ @ -+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or -+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than -+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by -+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), -+ @ which is less than 32 * (2^52) or 2^57. And when processing -+ @ data we are looking at triple as many addends... -+ @ -+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and -+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the -+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while -+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 -+ @ instruction accepts 2x32-bit input and writes 2x64-bit result. -+ @ This means that result of reduction have to be compressed upon -+ @ loop wrap-around. This can be done in the process of reduction -+ @ to minimize amount of instructions [as well as amount of -+ @ 128-bit instructions, which benefits low-end processors], but -+ @ one has to watch for H2 (which is narrower than H0) and 5*H4 -+ @ not being wider than 58 bits, so that result of right shift -+ @ by 26 bits fits in 32 bits. This is also useful on x86, -+ @ because it allows to use paddd in place for paddq, which -+ @ benefits Atom, where paddq is ridiculously slow. -+ -+ vshr.u64 q15,q8,#26 -+ vmovn.i64 d16,q8 -+ vshr.u64 q4,q5,#26 -+ vmovn.i64 d10,q5 -+ vadd.i64 q9,q9,q15 @ h3 -> h4 -+ vbic.i32 d16,#0xfc000000 @ &=0x03ffffff -+ vadd.i64 q6,q6,q4 @ h0 -> h1 -+ vbic.i32 d10,#0xfc000000 -+ -+ vshrn.u64 d30,q9,#26 -+ vmovn.i64 d18,q9 -+ vshr.u64 q4,q6,#26 -+ vmovn.i64 d12,q6 -+ vadd.i64 q7,q7,q4 @ h1 -> h2 -+ vbic.i32 d18,#0xfc000000 -+ vbic.i32 d12,#0xfc000000 -+ -+ vadd.i32 d10,d10,d30 -+ vshl.u32 d30,d30,#2 -+ vshrn.u64 d8,q7,#26 -+ vmovn.i64 d14,q7 -+ vadd.i32 d10,d10,d30 @ h4 -> h0 -+ vadd.i32 d16,d16,d8 @ h2 -> h3 -+ vbic.i32 d14,#0xfc000000 -+ -+ vshr.u32 d30,d10,#26 -+ vbic.i32 d10,#0xfc000000 -+ vshr.u32 d8,d16,#26 -+ vbic.i32 d16,#0xfc000000 -+ vadd.i32 d12,d12,d30 @ h0 -> h1 -+ vadd.i32 d18,d18,d8 @ h3 -> h4 -+ -+ subs r5,r5,#1 -+ beq .Lsquare_break_neon -+ -+ add r6,r0,#(48+0*9*4) -+ add r7,r0,#(48+1*9*4) -+ -+ vtrn.32 d0,d10 @ r^2:r^1 -+ vtrn.32 d3,d14 -+ vtrn.32 d5,d16 -+ vtrn.32 d1,d12 -+ vtrn.32 d7,d18 -+ -+ vshl.u32 d4,d3,#2 @ *5 -+ vshl.u32 d6,d5,#2 -+ vshl.u32 d2,d1,#2 -+ vshl.u32 d8,d7,#2 -+ vadd.i32 d4,d4,d3 -+ vadd.i32 d2,d2,d1 -+ vadd.i32 d6,d6,d5 -+ vadd.i32 d8,d8,d7 -+ -+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! -+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! -+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! -+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! -+ vst1.32 {d8[0]},[r6,:32] -+ vst1.32 {d8[1]},[r7,:32] -+ -+ b .Lsquare_neon -+ -+.align 4 -+.Lsquare_break_neon: -+ add r6,r0,#(48+2*4*9) -+ add r7,r0,#(48+3*4*9) -+ -+ vmov d0,d10 @ r^4:r^3 -+ vshl.u32 d2,d12,#2 @ *5 -+ vmov d1,d12 -+ vshl.u32 d4,d14,#2 -+ vmov d3,d14 -+ vshl.u32 d6,d16,#2 -+ vmov d5,d16 -+ vshl.u32 d8,d18,#2 -+ vmov d7,d18 -+ vadd.i32 d2,d2,d12 -+ vadd.i32 d4,d4,d14 -+ vadd.i32 d6,d6,d16 -+ vadd.i32 d8,d8,d18 -+ -+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! -+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! -+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! -+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! -+ vst1.32 {d8[0]},[r6] -+ vst1.32 {d8[1]},[r7] -+ -+.Lno_init_neon: -+ bx lr @ bx lr -+.size poly1305_init_neon,.-poly1305_init_neon -+ -+.type poly1305_blocks_neon,%function -+.align 5 -+poly1305_blocks_neon: -+.Lpoly1305_blocks_neon: -+ ldr ip,[r0,#36] @ is_base2_26 -+ -+ cmp r2,#64 -+ blo .Lpoly1305_blocks -+ -+ stmdb sp!,{r4-r7} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+ -+ tst ip,ip @ is_base2_26? -+ bne .Lbase2_26_neon -+ -+ stmdb sp!,{r1-r3,lr} -+ bl .Lpoly1305_init_neon -+ -+ ldr r4,[r0,#0] @ load hash value base 2^32 -+ ldr r5,[r0,#4] -+ ldr r6,[r0,#8] -+ ldr r7,[r0,#12] -+ ldr ip,[r0,#16] -+ -+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 -+ mov r3,r4,lsr#26 -+ veor d10,d10,d10 -+ mov r4,r5,lsr#20 -+ orr r3,r3,r5,lsl#6 -+ veor d12,d12,d12 -+ mov r5,r6,lsr#14 -+ orr r4,r4,r6,lsl#12 -+ veor d14,d14,d14 -+ mov r6,r7,lsr#8 -+ orr r5,r5,r7,lsl#18 -+ veor d16,d16,d16 -+ and r3,r3,#0x03ffffff -+ orr r6,r6,ip,lsl#24 -+ veor d18,d18,d18 -+ and r4,r4,#0x03ffffff -+ mov r1,#1 -+ and r5,r5,#0x03ffffff -+ str r1,[r0,#36] @ set is_base2_26 -+ -+ vmov.32 d10[0],r2 -+ vmov.32 d12[0],r3 -+ vmov.32 d14[0],r4 -+ vmov.32 d16[0],r5 -+ vmov.32 d18[0],r6 -+ adr r5,.Lzeros -+ -+ ldmia sp!,{r1-r3,lr} -+ b .Lhash_loaded -+ -+.align 4 -+.Lbase2_26_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ load hash value -+ -+ veor d10,d10,d10 -+ veor d12,d12,d12 -+ veor d14,d14,d14 -+ veor d16,d16,d16 -+ veor d18,d18,d18 -+ vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! -+ adr r5,.Lzeros -+ vld1.32 {d18[0]},[r0] -+ sub r0,r0,#16 @ rewind -+ -+.Lhash_loaded: -+ add r4,r1,#32 -+ mov r3,r3,lsl#24 -+ tst r2,#31 -+ beq .Leven -+ -+ vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]! -+ vmov.32 d28[0],r3 -+ sub r2,r2,#16 -+ add r4,r1,#32 -+ -+# ifdef __ARMEB__ -+ vrev32.8 q10,q10 -+ vrev32.8 q13,q13 -+ vrev32.8 q11,q11 -+ vrev32.8 q12,q12 -+# endif -+ vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26 -+ vshl.u32 d26,d26,#18 -+ -+ vsri.u32 d26,d24,#14 -+ vshl.u32 d24,d24,#12 -+ vadd.i32 d29,d28,d18 @ add hash value and move to #hi -+ -+ vbic.i32 d26,#0xfc000000 -+ vsri.u32 d24,d22,#20 -+ vshl.u32 d22,d22,#6 -+ -+ vbic.i32 d24,#0xfc000000 -+ vsri.u32 d22,d20,#26 -+ vadd.i32 d27,d26,d16 -+ -+ vbic.i32 d20,#0xfc000000 -+ vbic.i32 d22,#0xfc000000 -+ vadd.i32 d25,d24,d14 -+ -+ vadd.i32 d21,d20,d10 -+ vadd.i32 d23,d22,d12 -+ -+ mov r7,r5 -+ add r6,r0,#48 -+ -+ cmp r2,r2 -+ b .Long_tail -+ -+.align 4 -+.Leven: -+ subs r2,r2,#64 -+ it lo -+ movlo r4,r5 -+ -+ vmov.i32 q14,#1<<24 @ padbit, yes, always -+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] -+ add r1,r1,#64 -+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) -+ add r4,r4,#64 -+ itt hi -+ addhi r7,r0,#(48+1*9*4) -+ addhi r6,r0,#(48+3*9*4) -+ -+# ifdef __ARMEB__ -+ vrev32.8 q10,q10 -+ vrev32.8 q13,q13 -+ vrev32.8 q11,q11 -+ vrev32.8 q12,q12 -+# endif -+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 -+ vshl.u32 q13,q13,#18 -+ -+ vsri.u32 q13,q12,#14 -+ vshl.u32 q12,q12,#12 -+ -+ vbic.i32 q13,#0xfc000000 -+ vsri.u32 q12,q11,#20 -+ vshl.u32 q11,q11,#6 -+ -+ vbic.i32 q12,#0xfc000000 -+ vsri.u32 q11,q10,#26 -+ -+ vbic.i32 q10,#0xfc000000 -+ vbic.i32 q11,#0xfc000000 -+ -+ bls .Lskip_loop -+ -+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2 -+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 -+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! -+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! -+ b .Loop_neon -+ -+.align 5 -+.Loop_neon: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 -+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r -+ @ ___________________/ -+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 -+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r -+ @ ___________________/ ____________________/ -+ @ -+ @ Note that we start with inp[2:3]*r^2. This is because it -+ @ doesn't depend on reduction in previous iteration. -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ inp[2:3]*r^2 -+ -+ vadd.i32 d24,d24,d14 @ accumulate inp[0:1] -+ vmull.u32 q7,d25,d0[1] -+ vadd.i32 d20,d20,d10 -+ vmull.u32 q5,d21,d0[1] -+ vadd.i32 d26,d26,d16 -+ vmull.u32 q8,d27,d0[1] -+ vmlal.u32 q7,d23,d1[1] -+ vadd.i32 d22,d22,d12 -+ vmull.u32 q6,d23,d0[1] -+ -+ vadd.i32 d28,d28,d18 -+ vmull.u32 q9,d29,d0[1] -+ subs r2,r2,#64 -+ vmlal.u32 q5,d29,d2[1] -+ it lo -+ movlo r4,r5 -+ vmlal.u32 q8,d25,d1[1] -+ vld1.32 d8[1],[r7,:32] -+ vmlal.u32 q6,d21,d1[1] -+ vmlal.u32 q9,d27,d1[1] -+ -+ vmlal.u32 q5,d27,d4[1] -+ vmlal.u32 q8,d23,d3[1] -+ vmlal.u32 q9,d25,d3[1] -+ vmlal.u32 q6,d29,d4[1] -+ vmlal.u32 q7,d21,d3[1] -+ -+ vmlal.u32 q8,d21,d5[1] -+ vmlal.u32 q5,d25,d6[1] -+ vmlal.u32 q9,d23,d5[1] -+ vmlal.u32 q6,d27,d6[1] -+ vmlal.u32 q7,d29,d6[1] -+ -+ vmlal.u32 q8,d29,d8[1] -+ vmlal.u32 q5,d23,d8[1] -+ vmlal.u32 q9,d21,d7[1] -+ vmlal.u32 q6,d25,d8[1] -+ vmlal.u32 q7,d27,d8[1] -+ -+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) -+ add r4,r4,#64 -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ (hash+inp[0:1])*r^4 and accumulate -+ -+ vmlal.u32 q8,d26,d0[0] -+ vmlal.u32 q5,d20,d0[0] -+ vmlal.u32 q9,d28,d0[0] -+ vmlal.u32 q6,d22,d0[0] -+ vmlal.u32 q7,d24,d0[0] -+ vld1.32 d8[0],[r6,:32] -+ -+ vmlal.u32 q8,d24,d1[0] -+ vmlal.u32 q5,d28,d2[0] -+ vmlal.u32 q9,d26,d1[0] -+ vmlal.u32 q6,d20,d1[0] -+ vmlal.u32 q7,d22,d1[0] -+ -+ vmlal.u32 q8,d22,d3[0] -+ vmlal.u32 q5,d26,d4[0] -+ vmlal.u32 q9,d24,d3[0] -+ vmlal.u32 q6,d28,d4[0] -+ vmlal.u32 q7,d20,d3[0] -+ -+ vmlal.u32 q8,d20,d5[0] -+ vmlal.u32 q5,d24,d6[0] -+ vmlal.u32 q9,d22,d5[0] -+ vmlal.u32 q6,d26,d6[0] -+ vmlal.u32 q8,d28,d8[0] -+ -+ vmlal.u32 q7,d28,d6[0] -+ vmlal.u32 q5,d22,d8[0] -+ vmlal.u32 q9,d20,d7[0] -+ vmov.i32 q14,#1<<24 @ padbit, yes, always -+ vmlal.u32 q6,d24,d8[0] -+ vmlal.u32 q7,d26,d8[0] -+ -+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] -+ add r1,r1,#64 -+# ifdef __ARMEB__ -+ vrev32.8 q10,q10 -+ vrev32.8 q11,q11 -+ vrev32.8 q12,q12 -+ vrev32.8 q13,q13 -+# endif -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of -+ @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14. -+ -+ vshr.u64 q15,q8,#26 -+ vmovn.i64 d16,q8 -+ vshr.u64 q4,q5,#26 -+ vmovn.i64 d10,q5 -+ vadd.i64 q9,q9,q15 @ h3 -> h4 -+ vbic.i32 d16,#0xfc000000 -+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 -+ vadd.i64 q6,q6,q4 @ h0 -> h1 -+ vshl.u32 q13,q13,#18 -+ vbic.i32 d10,#0xfc000000 -+ -+ vshrn.u64 d30,q9,#26 -+ vmovn.i64 d18,q9 -+ vshr.u64 q4,q6,#26 -+ vmovn.i64 d12,q6 -+ vadd.i64 q7,q7,q4 @ h1 -> h2 -+ vsri.u32 q13,q12,#14 -+ vbic.i32 d18,#0xfc000000 -+ vshl.u32 q12,q12,#12 -+ vbic.i32 d12,#0xfc000000 -+ -+ vadd.i32 d10,d10,d30 -+ vshl.u32 d30,d30,#2 -+ vbic.i32 q13,#0xfc000000 -+ vshrn.u64 d8,q7,#26 -+ vmovn.i64 d14,q7 -+ vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec] -+ vsri.u32 q12,q11,#20 -+ vadd.i32 d16,d16,d8 @ h2 -> h3 -+ vshl.u32 q11,q11,#6 -+ vbic.i32 d14,#0xfc000000 -+ vbic.i32 q12,#0xfc000000 -+ -+ vshrn.u64 d30,q5,#26 @ re-narrow -+ vmovn.i64 d10,q5 -+ vsri.u32 q11,q10,#26 -+ vbic.i32 q10,#0xfc000000 -+ vshr.u32 d8,d16,#26 -+ vbic.i32 d16,#0xfc000000 -+ vbic.i32 d10,#0xfc000000 -+ vadd.i32 d12,d12,d30 @ h0 -> h1 -+ vadd.i32 d18,d18,d8 @ h3 -> h4 -+ vbic.i32 q11,#0xfc000000 -+ -+ bhi .Loop_neon -+ -+.Lskip_loop: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 -+ -+ add r7,r0,#(48+0*9*4) -+ add r6,r0,#(48+1*9*4) -+ adds r2,r2,#32 -+ it ne -+ movne r2,#0 -+ bne .Long_tail -+ -+ vadd.i32 d25,d24,d14 @ add hash value and move to #hi -+ vadd.i32 d21,d20,d10 -+ vadd.i32 d27,d26,d16 -+ vadd.i32 d23,d22,d12 -+ vadd.i32 d29,d28,d18 -+ -+.Long_tail: -+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1 -+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2 -+ -+ vadd.i32 d24,d24,d14 @ can be redundant -+ vmull.u32 q7,d25,d0 -+ vadd.i32 d20,d20,d10 -+ vmull.u32 q5,d21,d0 -+ vadd.i32 d26,d26,d16 -+ vmull.u32 q8,d27,d0 -+ vadd.i32 d22,d22,d12 -+ vmull.u32 q6,d23,d0 -+ vadd.i32 d28,d28,d18 -+ vmull.u32 q9,d29,d0 -+ -+ vmlal.u32 q5,d29,d2 -+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! -+ vmlal.u32 q8,d25,d1 -+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! -+ vmlal.u32 q6,d21,d1 -+ vmlal.u32 q9,d27,d1 -+ vmlal.u32 q7,d23,d1 -+ -+ vmlal.u32 q8,d23,d3 -+ vld1.32 d8[1],[r7,:32] -+ vmlal.u32 q5,d27,d4 -+ vld1.32 d8[0],[r6,:32] -+ vmlal.u32 q9,d25,d3 -+ vmlal.u32 q6,d29,d4 -+ vmlal.u32 q7,d21,d3 -+ -+ vmlal.u32 q8,d21,d5 -+ it ne -+ addne r7,r0,#(48+2*9*4) -+ vmlal.u32 q5,d25,d6 -+ it ne -+ addne r6,r0,#(48+3*9*4) -+ vmlal.u32 q9,d23,d5 -+ vmlal.u32 q6,d27,d6 -+ vmlal.u32 q7,d29,d6 -+ -+ vmlal.u32 q8,d29,d8 -+ vorn q0,q0,q0 @ all-ones, can be redundant -+ vmlal.u32 q5,d23,d8 -+ vshr.u64 q0,q0,#38 -+ vmlal.u32 q9,d21,d7 -+ vmlal.u32 q6,d25,d8 -+ vmlal.u32 q7,d27,d8 -+ -+ beq .Lshort_tail -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ (hash+inp[0:1])*r^4:r^3 and accumulate -+ -+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3 -+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 -+ -+ vmlal.u32 q7,d24,d0 -+ vmlal.u32 q5,d20,d0 -+ vmlal.u32 q8,d26,d0 -+ vmlal.u32 q6,d22,d0 -+ vmlal.u32 q9,d28,d0 -+ -+ vmlal.u32 q5,d28,d2 -+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! -+ vmlal.u32 q8,d24,d1 -+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! -+ vmlal.u32 q6,d20,d1 -+ vmlal.u32 q9,d26,d1 -+ vmlal.u32 q7,d22,d1 -+ -+ vmlal.u32 q8,d22,d3 -+ vld1.32 d8[1],[r7,:32] -+ vmlal.u32 q5,d26,d4 -+ vld1.32 d8[0],[r6,:32] -+ vmlal.u32 q9,d24,d3 -+ vmlal.u32 q6,d28,d4 -+ vmlal.u32 q7,d20,d3 -+ -+ vmlal.u32 q8,d20,d5 -+ vmlal.u32 q5,d24,d6 -+ vmlal.u32 q9,d22,d5 -+ vmlal.u32 q6,d26,d6 -+ vmlal.u32 q7,d28,d6 -+ -+ vmlal.u32 q8,d28,d8 -+ vorn q0,q0,q0 @ all-ones -+ vmlal.u32 q5,d22,d8 -+ vshr.u64 q0,q0,#38 -+ vmlal.u32 q9,d20,d7 -+ vmlal.u32 q6,d24,d8 -+ vmlal.u32 q7,d26,d8 -+ -+.Lshort_tail: -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ horizontal addition -+ -+ vadd.i64 d16,d16,d17 -+ vadd.i64 d10,d10,d11 -+ vadd.i64 d18,d18,d19 -+ vadd.i64 d12,d12,d13 -+ vadd.i64 d14,d14,d15 -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ lazy reduction, but without narrowing -+ -+ vshr.u64 q15,q8,#26 -+ vand.i64 q8,q8,q0 -+ vshr.u64 q4,q5,#26 -+ vand.i64 q5,q5,q0 -+ vadd.i64 q9,q9,q15 @ h3 -> h4 -+ vadd.i64 q6,q6,q4 @ h0 -> h1 -+ -+ vshr.u64 q15,q9,#26 -+ vand.i64 q9,q9,q0 -+ vshr.u64 q4,q6,#26 -+ vand.i64 q6,q6,q0 -+ vadd.i64 q7,q7,q4 @ h1 -> h2 -+ -+ vadd.i64 q5,q5,q15 -+ vshl.u64 q15,q15,#2 -+ vshr.u64 q4,q7,#26 -+ vand.i64 q7,q7,q0 -+ vadd.i64 q5,q5,q15 @ h4 -> h0 -+ vadd.i64 q8,q8,q4 @ h2 -> h3 -+ -+ vshr.u64 q15,q5,#26 -+ vand.i64 q5,q5,q0 -+ vshr.u64 q4,q8,#26 -+ vand.i64 q8,q8,q0 -+ vadd.i64 q6,q6,q15 @ h0 -> h1 -+ vadd.i64 q9,q9,q4 @ h3 -> h4 -+ -+ cmp r2,#0 -+ bne .Leven -+ -+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -+ @ store hash value -+ -+ vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! -+ vst1.32 {d18[0]},[r0] -+ -+ vldmia sp!,{d8-d15} @ epilogue -+ ldmia sp!,{r4-r7} -+ bx lr @ bx lr -+.size poly1305_blocks_neon,.-poly1305_blocks_neon -+ -+.align 5 -+.Lzeros: -+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -+#ifndef __KERNEL__ -+.LOPENSSL_armcap: -+# ifdef _WIN32 -+.word OPENSSL_armcap_P -+# else -+.word OPENSSL_armcap_P-.Lpoly1305_init -+# endif -+.comm OPENSSL_armcap_P,4,4 -+.hidden OPENSSL_armcap_P -+#endif -+#endif -+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm" -+.align 2 ---- /dev/null -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -0,0 +1,276 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM -+ * -+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> -+ */ -+ -+#include <asm/hwcap.h> -+#include <asm/neon.h> -+#include <asm/simd.h> -+#include <asm/unaligned.h> -+#include <crypto/algapi.h> -+#include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> -+#include <crypto/internal/simd.h> -+#include <linux/cpufeature.h> -+#include <linux/crypto.h> -+#include <linux/jump_label.h> -+#include <linux/module.h> -+ -+void poly1305_init_arm(void *state, const u8 *key); -+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); -+void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); -+ -+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) -+{ -+} -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -+ -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+{ -+ poly1305_init_arm(&dctx->h, key); -+ dctx->s[0] = get_unaligned_le32(key + 16); -+ dctx->s[1] = get_unaligned_le32(key + 20); -+ dctx->s[2] = get_unaligned_le32(key + 24); -+ dctx->s[3] = get_unaligned_le32(key + 28); -+ dctx->buflen = 0; -+} -+EXPORT_SYMBOL(poly1305_init_arch); -+ -+static int arm_poly1305_init(struct shash_desc *desc) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ dctx->buflen = 0; -+ dctx->rset = 0; -+ dctx->sset = false; -+ -+ return 0; -+} -+ -+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, -+ u32 len, u32 hibit, bool do_neon) -+{ -+ if (unlikely(!dctx->sset)) { -+ if (!dctx->rset) { -+ poly1305_init_arm(&dctx->h, src); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->rset = 1; -+ } -+ if (len >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ if (len < POLY1305_BLOCK_SIZE) -+ return; -+ } -+ -+ len &= ~(POLY1305_BLOCK_SIZE - 1); -+ -+ if (static_branch_likely(&have_neon) && likely(do_neon)) -+ poly1305_blocks_neon(&dctx->h, src, len, hibit); -+ else -+ poly1305_blocks_arm(&dctx->h, src, len, hibit); -+} -+ -+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, -+ const u8 *src, u32 len, bool do_neon) -+{ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ len -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ arm_poly1305_blocks(dctx, dctx->buf, -+ POLY1305_BLOCK_SIZE, 1, false); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(len >= POLY1305_BLOCK_SIZE)) { -+ arm_poly1305_blocks(dctx, src, len, 1, do_neon); -+ src += round_down(len, POLY1305_BLOCK_SIZE); -+ len %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(len)) { -+ dctx->buflen = len; -+ memcpy(dctx->buf, src, len); -+ } -+} -+ -+static int arm_poly1305_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ arm_poly1305_do_update(dctx, src, srclen, false); -+ return 0; -+} -+ -+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, -+ const u8 *src, -+ unsigned int srclen) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ bool do_neon = crypto_simd_usable() && srclen > 128; -+ -+ if (static_branch_likely(&have_neon) && do_neon) -+ kernel_neon_begin(); -+ arm_poly1305_do_update(dctx, src, srclen, do_neon); -+ if (static_branch_likely(&have_neon) && do_neon) -+ kernel_neon_end(); -+ return 0; -+} -+ -+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, -+ unsigned int nbytes) -+{ -+ bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && -+ crypto_simd_usable(); -+ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ nbytes -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ poly1305_blocks_arm(&dctx->h, dctx->buf, -+ POLY1305_BLOCK_SIZE, 1); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { -+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); -+ -+ if (static_branch_likely(&have_neon) && do_neon) { -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, len, 1); -+ kernel_neon_end(); -+ } else { -+ poly1305_blocks_arm(&dctx->h, src, len, 1); -+ } -+ src += len; -+ nbytes %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(nbytes)) { -+ dctx->buflen = nbytes; -+ memcpy(dctx->buf, src, nbytes); -+ } -+} -+EXPORT_SYMBOL(poly1305_update_arch); -+ -+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -+{ -+ __le32 digest[4]; -+ u64 f = 0; -+ -+ if (unlikely(dctx->buflen)) { -+ dctx->buf[dctx->buflen++] = 1; -+ memset(dctx->buf + dctx->buflen, 0, -+ POLY1305_BLOCK_SIZE - dctx->buflen); -+ poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); -+ } -+ -+ poly1305_emit_arm(&dctx->h, digest, dctx->s); -+ -+ /* mac = (h + s) % (2^128) */ -+ f = (f >> 32) + le32_to_cpu(digest[0]); -+ put_unaligned_le32(f, dst); -+ f = (f >> 32) + le32_to_cpu(digest[1]); -+ put_unaligned_le32(f, dst + 4); -+ f = (f >> 32) + le32_to_cpu(digest[2]); -+ put_unaligned_le32(f, dst + 8); -+ f = (f >> 32) + le32_to_cpu(digest[3]); -+ put_unaligned_le32(f, dst + 12); -+ -+ *dctx = (struct poly1305_desc_ctx){}; -+} -+EXPORT_SYMBOL(poly1305_final_arch); -+ -+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (unlikely(!dctx->sset)) -+ return -ENOKEY; -+ -+ poly1305_final_arch(dctx, dst); -+ return 0; -+} -+ -+static struct shash_alg arm_poly1305_algs[] = {{ -+ .init = arm_poly1305_init, -+ .update = arm_poly1305_update, -+ .final = arm_poly1305_final, -+ .digestsize = POLY1305_DIGEST_SIZE, -+ .descsize = sizeof(struct poly1305_desc_ctx), -+ -+ .base.cra_name = "poly1305", -+ .base.cra_driver_name = "poly1305-arm", -+ .base.cra_priority = 150, -+ .base.cra_blocksize = POLY1305_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+#ifdef CONFIG_KERNEL_MODE_NEON -+}, { -+ .init = arm_poly1305_init, -+ .update = arm_poly1305_update_neon, -+ .final = arm_poly1305_final, -+ .digestsize = POLY1305_DIGEST_SIZE, -+ .descsize = sizeof(struct poly1305_desc_ctx), -+ -+ .base.cra_name = "poly1305", -+ .base.cra_driver_name = "poly1305-neon", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = POLY1305_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+#endif -+}}; -+ -+static int __init arm_poly1305_mod_init(void) -+{ -+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && -+ (elf_hwcap & HWCAP_NEON)) -+ static_branch_enable(&have_neon); -+ else -+ /* register only the first entry */ -+ return crypto_register_shash(&arm_poly1305_algs[0]); -+ -+ return crypto_register_shashes(arm_poly1305_algs, -+ ARRAY_SIZE(arm_poly1305_algs)); -+} -+ -+static void __exit arm_poly1305_mod_exit(void) -+{ -+ if (!static_branch_likely(&have_neon)) { -+ crypto_unregister_shash(&arm_poly1305_algs[0]); -+ return; -+ } -+ crypto_unregister_shashes(arm_poly1305_algs, -+ ARRAY_SIZE(arm_poly1305_algs)); -+} -+ -+module_init(arm_poly1305_mod_init); -+module_exit(arm_poly1305_mod_exit); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("poly1305"); -+MODULE_ALIAS_CRYPTO("poly1305-arm"); -+MODULE_ALIAS_CRYPTO("poly1305-neon"); ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -40,7 +40,7 @@ config CRYPTO_LIB_DES - config CRYPTO_LIB_POLY1305_RSIZE - int - default 4 if X86_64 -- default 9 if ARM64 -+ default 9 if ARM || ARM64 - default 1 - - config CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0020-crypto-mips-poly1305-incorporate-OpenSSL-CRYPTOGAMS-.patch b/target/linux/generic/backport-5.4/080-wireguard-0020-crypto-mips-poly1305-incorporate-OpenSSL-CRYPTOGAMS-.patch deleted file mode 100644 index 272e1797da..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0020-crypto-mips-poly1305-incorporate-OpenSSL-CRYPTOGAMS-.patch +++ /dev/null @@ -1,1563 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:26 +0100 -Subject: [PATCH] crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS - optimized implementation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit a11d055e7a64ac34a5e99b6fe731299449cbcd58 upstream. - -This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for -MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been -contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken -straight from this upstream GitHub repository [0] at commit -d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes -required to build it as part of a Linux kernel module. - -[0] https://github.com/dot-asm/cryptogams - -Co-developed-by: Andy Polyakov <appro@cryptogams.org> -Signed-off-by: Andy Polyakov <appro@cryptogams.org> -Co-developed-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/crypto/Makefile | 14 + - arch/mips/crypto/poly1305-glue.c | 203 +++++ - arch/mips/crypto/poly1305-mips.pl | 1273 +++++++++++++++++++++++++++++ - crypto/Kconfig | 5 + - lib/crypto/Kconfig | 1 + - 5 files changed, 1496 insertions(+) - create mode 100644 arch/mips/crypto/poly1305-glue.c - create mode 100644 arch/mips/crypto/poly1305-mips.pl - ---- a/arch/mips/crypto/Makefile -+++ b/arch/mips/crypto/Makefile -@@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32 - obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o - chacha-mips-y := chacha-core.o chacha-glue.o - AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots -+ -+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o -+poly1305-mips-y := poly1305-core.o poly1305-glue.o -+ -+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 -+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 -+ -+quiet_cmd_perlasm = PERLASM $@ -+ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) -+ -+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE -+ $(call if_changed,perlasm) -+ -+targets += poly1305-core.S ---- /dev/null -+++ b/arch/mips/crypto/poly1305-glue.c -@@ -0,0 +1,203 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS -+ * -+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> -+ */ -+ -+#include <asm/unaligned.h> -+#include <crypto/algapi.h> -+#include <crypto/internal/hash.h> -+#include <crypto/internal/poly1305.h> -+#include <linux/cpufeature.h> -+#include <linux/crypto.h> -+#include <linux/module.h> -+ -+asmlinkage void poly1305_init_mips(void *state, const u8 *key); -+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); -+asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); -+ -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+{ -+ poly1305_init_mips(&dctx->h, key); -+ dctx->s[0] = get_unaligned_le32(key + 16); -+ dctx->s[1] = get_unaligned_le32(key + 20); -+ dctx->s[2] = get_unaligned_le32(key + 24); -+ dctx->s[3] = get_unaligned_le32(key + 28); -+ dctx->buflen = 0; -+} -+EXPORT_SYMBOL(poly1305_init_arch); -+ -+static int mips_poly1305_init(struct shash_desc *desc) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ dctx->buflen = 0; -+ dctx->rset = 0; -+ dctx->sset = false; -+ -+ return 0; -+} -+ -+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, -+ u32 len, u32 hibit) -+{ -+ if (unlikely(!dctx->sset)) { -+ if (!dctx->rset) { -+ poly1305_init_mips(&dctx->h, src); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->rset = 1; -+ } -+ if (len >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ if (len < POLY1305_BLOCK_SIZE) -+ return; -+ } -+ -+ len &= ~(POLY1305_BLOCK_SIZE - 1); -+ -+ poly1305_blocks_mips(&dctx->h, src, len, hibit); -+} -+ -+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src, -+ unsigned int len) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ len -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(len >= POLY1305_BLOCK_SIZE)) { -+ mips_poly1305_blocks(dctx, src, len, 1); -+ src += round_down(len, POLY1305_BLOCK_SIZE); -+ len %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(len)) { -+ dctx->buflen = len; -+ memcpy(dctx->buf, src, len); -+ } -+ return 0; -+} -+ -+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, -+ unsigned int nbytes) -+{ -+ if (unlikely(dctx->buflen)) { -+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); -+ -+ memcpy(dctx->buf + dctx->buflen, src, bytes); -+ src += bytes; -+ nbytes -= bytes; -+ dctx->buflen += bytes; -+ -+ if (dctx->buflen == POLY1305_BLOCK_SIZE) { -+ poly1305_blocks_mips(&dctx->h, dctx->buf, -+ POLY1305_BLOCK_SIZE, 1); -+ dctx->buflen = 0; -+ } -+ } -+ -+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { -+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); -+ -+ poly1305_blocks_mips(&dctx->h, src, len, 1); -+ src += len; -+ nbytes %= POLY1305_BLOCK_SIZE; -+ } -+ -+ if (unlikely(nbytes)) { -+ dctx->buflen = nbytes; -+ memcpy(dctx->buf, src, nbytes); -+ } -+} -+EXPORT_SYMBOL(poly1305_update_arch); -+ -+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) -+{ -+ __le32 digest[4]; -+ u64 f = 0; -+ -+ if (unlikely(dctx->buflen)) { -+ dctx->buf[dctx->buflen++] = 1; -+ memset(dctx->buf + dctx->buflen, 0, -+ POLY1305_BLOCK_SIZE - dctx->buflen); -+ poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); -+ } -+ -+ poly1305_emit_mips(&dctx->h, digest, dctx->s); -+ -+ /* mac = (h + s) % (2^128) */ -+ f = (f >> 32) + le32_to_cpu(digest[0]); -+ put_unaligned_le32(f, dst); -+ f = (f >> 32) + le32_to_cpu(digest[1]); -+ put_unaligned_le32(f, dst + 4); -+ f = (f >> 32) + le32_to_cpu(digest[2]); -+ put_unaligned_le32(f, dst + 8); -+ f = (f >> 32) + le32_to_cpu(digest[3]); -+ put_unaligned_le32(f, dst + 12); -+ -+ *dctx = (struct poly1305_desc_ctx){}; -+} -+EXPORT_SYMBOL(poly1305_final_arch); -+ -+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst) -+{ -+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); -+ -+ if (unlikely(!dctx->sset)) -+ return -ENOKEY; -+ -+ poly1305_final_arch(dctx, dst); -+ return 0; -+} -+ -+static struct shash_alg mips_poly1305_alg = { -+ .init = mips_poly1305_init, -+ .update = mips_poly1305_update, -+ .final = mips_poly1305_final, -+ .digestsize = POLY1305_DIGEST_SIZE, -+ .descsize = sizeof(struct poly1305_desc_ctx), -+ -+ .base.cra_name = "poly1305", -+ .base.cra_driver_name = "poly1305-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = POLY1305_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+}; -+ -+static int __init mips_poly1305_mod_init(void) -+{ -+ return crypto_register_shash(&mips_poly1305_alg); -+} -+ -+static void __exit mips_poly1305_mod_exit(void) -+{ -+ crypto_unregister_shash(&mips_poly1305_alg); -+} -+ -+module_init(mips_poly1305_mod_init); -+module_exit(mips_poly1305_mod_exit); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("poly1305"); -+MODULE_ALIAS_CRYPTO("poly1305-mips"); ---- /dev/null -+++ b/arch/mips/crypto/poly1305-mips.pl -@@ -0,0 +1,1273 @@ -+#!/usr/bin/env perl -+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause -+# -+# ==================================================================== -+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL -+# project. -+# ==================================================================== -+ -+# Poly1305 hash for MIPS. -+# -+# May 2016 -+# -+# Numbers are cycles per processed byte with poly1305_blocks alone. -+# -+# IALU/gcc -+# R1x000 ~5.5/+130% (big-endian) -+# Octeon II 2.50/+70% (little-endian) -+# -+# March 2019 -+# -+# Add 32-bit code path. -+# -+# October 2019 -+# -+# Modulo-scheduling reduction allows to omit dependency chain at the -+# end of inner loop and improve performance. Also optimize MIPS32R2 -+# code path for MIPS 1004K core. Per René von Dorst's suggestions. -+# -+# IALU/gcc -+# R1x000 ~9.8/? (big-endian) -+# Octeon II 3.65/+140% (little-endian) -+# MT7621/1004K 4.75/? (little-endian) -+# -+###################################################################### -+# There is a number of MIPS ABI in use, O32 and N32/64 are most -+# widely used. Then there is a new contender: NUBI. It appears that if -+# one picks the latter, it's possible to arrange code in ABI neutral -+# manner. Therefore let's stick to NUBI register layout: -+# -+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); -+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); -+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); -+# -+# The return value is placed in $a0. Following coding rules facilitate -+# interoperability: -+# -+# - never ever touch $tp, "thread pointer", former $gp [o32 can be -+# excluded from the rule, because it's specified volatile]; -+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting -+# old code]; -+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; -+# -+# For reference here is register layout for N32/64 MIPS ABIs: -+# -+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); -+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); -+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); -+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); -+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); -+# -+# <appro@openssl.org> -+# -+###################################################################### -+ -+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 -+ -+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; -+ -+if ($flavour =~ /64|n32/i) {{{ -+###################################################################### -+# 64-bit code path -+# -+ -+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); -+ -+$code.=<<___; -+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ -+ defined(_MIPS_ARCH_MIPS64R6)) \\ -+ && !defined(_MIPS_ARCH_MIPS64R2) -+# define _MIPS_ARCH_MIPS64R2 -+#endif -+ -+#if defined(_MIPS_ARCH_MIPS64R6) -+# define dmultu(rs,rt) -+# define mflo(rd,rs,rt) dmulu rd,rs,rt -+# define mfhi(rd,rs,rt) dmuhu rd,rs,rt -+#else -+# define dmultu(rs,rt) dmultu rs,rt -+# define mflo(rd,rs,rt) mflo rd -+# define mfhi(rd,rs,rt) mfhi rd -+#endif -+ -+#ifdef __KERNEL__ -+# define poly1305_init poly1305_init_mips -+# define poly1305_blocks poly1305_blocks_mips -+# define poly1305_emit poly1305_emit_mips -+#endif -+ -+#if defined(__MIPSEB__) && !defined(MIPSEB) -+# define MIPSEB -+#endif -+ -+#ifdef MIPSEB -+# define MSB 0 -+# define LSB 7 -+#else -+# define MSB 7 -+# define LSB 0 -+#endif -+ -+.text -+.set noat -+.set noreorder -+ -+.align 5 -+.globl poly1305_init -+.ent poly1305_init -+poly1305_init: -+ .frame $sp,0,$ra -+ .set reorder -+ -+ sd $zero,0($ctx) -+ sd $zero,8($ctx) -+ sd $zero,16($ctx) -+ -+ beqz $inp,.Lno_key -+ -+#if defined(_MIPS_ARCH_MIPS64R6) -+ andi $tmp0,$inp,7 # $inp % 8 -+ dsubu $inp,$inp,$tmp0 # align $inp -+ sll $tmp0,$tmp0,3 # byte to bit offset -+ ld $in0,0($inp) -+ ld $in1,8($inp) -+ beqz $tmp0,.Laligned_key -+ ld $tmp2,16($inp) -+ -+ subu $tmp1,$zero,$tmp0 -+# ifdef MIPSEB -+ dsllv $in0,$in0,$tmp0 -+ dsrlv $tmp3,$in1,$tmp1 -+ dsllv $in1,$in1,$tmp0 -+ dsrlv $tmp2,$tmp2,$tmp1 -+# else -+ dsrlv $in0,$in0,$tmp0 -+ dsllv $tmp3,$in1,$tmp1 -+ dsrlv $in1,$in1,$tmp0 -+ dsllv $tmp2,$tmp2,$tmp1 -+# endif -+ or $in0,$in0,$tmp3 -+ or $in1,$in1,$tmp2 -+.Laligned_key: -+#else -+ ldl $in0,0+MSB($inp) -+ ldl $in1,8+MSB($inp) -+ ldr $in0,0+LSB($inp) -+ ldr $in1,8+LSB($inp) -+#endif -+#ifdef MIPSEB -+# if defined(_MIPS_ARCH_MIPS64R2) -+ dsbh $in0,$in0 # byte swap -+ dsbh $in1,$in1 -+ dshd $in0,$in0 -+ dshd $in1,$in1 -+# else -+ ori $tmp0,$zero,0xFF -+ dsll $tmp2,$tmp0,32 -+ or $tmp0,$tmp2 # 0x000000FF000000FF -+ -+ and $tmp1,$in0,$tmp0 # byte swap -+ and $tmp3,$in1,$tmp0 -+ dsrl $tmp2,$in0,24 -+ dsrl $tmp4,$in1,24 -+ dsll $tmp1,24 -+ dsll $tmp3,24 -+ and $tmp2,$tmp0 -+ and $tmp4,$tmp0 -+ dsll $tmp0,8 # 0x0000FF000000FF00 -+ or $tmp1,$tmp2 -+ or $tmp3,$tmp4 -+ and $tmp2,$in0,$tmp0 -+ and $tmp4,$in1,$tmp0 -+ dsrl $in0,8 -+ dsrl $in1,8 -+ dsll $tmp2,8 -+ dsll $tmp4,8 -+ and $in0,$tmp0 -+ and $in1,$tmp0 -+ or $tmp1,$tmp2 -+ or $tmp3,$tmp4 -+ or $in0,$tmp1 -+ or $in1,$tmp3 -+ dsrl $tmp1,$in0,32 -+ dsrl $tmp3,$in1,32 -+ dsll $in0,32 -+ dsll $in1,32 -+ or $in0,$tmp1 -+ or $in1,$tmp3 -+# endif -+#endif -+ li $tmp0,1 -+ dsll $tmp0,32 # 0x0000000100000000 -+ daddiu $tmp0,-63 # 0x00000000ffffffc1 -+ dsll $tmp0,28 # 0x0ffffffc10000000 -+ daddiu $tmp0,-1 # 0x0ffffffc0fffffff -+ -+ and $in0,$tmp0 -+ daddiu $tmp0,-3 # 0x0ffffffc0ffffffc -+ and $in1,$tmp0 -+ -+ sd $in0,24($ctx) -+ dsrl $tmp0,$in1,2 -+ sd $in1,32($ctx) -+ daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) -+ sd $tmp0,40($ctx) -+ -+.Lno_key: -+ li $v0,0 # return 0 -+ jr $ra -+.end poly1305_init -+___ -+{ -+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; -+ -+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = -+ ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); -+my ($shr,$shl) = ($s6,$s7); # used on R6 -+ -+$code.=<<___; -+.align 5 -+.globl poly1305_blocks -+.ent poly1305_blocks -+poly1305_blocks: -+ .set noreorder -+ dsrl $len,4 # number of complete blocks -+ bnez $len,poly1305_blocks_internal -+ nop -+ jr $ra -+ nop -+.end poly1305_blocks -+ -+.align 5 -+.ent poly1305_blocks_internal -+poly1305_blocks_internal: -+ .set noreorder -+#if defined(_MIPS_ARCH_MIPS64R6) -+ .frame $sp,8*8,$ra -+ .mask $SAVED_REGS_MASK|0x000c0000,-8 -+ dsubu $sp,8*8 -+ sd $s7,56($sp) -+ sd $s6,48($sp) -+#else -+ .frame $sp,6*8,$ra -+ .mask $SAVED_REGS_MASK,-8 -+ dsubu $sp,6*8 -+#endif -+ sd $s5,40($sp) -+ sd $s4,32($sp) -+___ -+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue -+ sd $s3,24($sp) -+ sd $s2,16($sp) -+ sd $s1,8($sp) -+ sd $s0,0($sp) -+___ -+$code.=<<___; -+ .set reorder -+ -+#if defined(_MIPS_ARCH_MIPS64R6) -+ andi $shr,$inp,7 -+ dsubu $inp,$inp,$shr # align $inp -+ sll $shr,$shr,3 # byte to bit offset -+ subu $shl,$zero,$shr -+#endif -+ -+ ld $h0,0($ctx) # load hash value -+ ld $h1,8($ctx) -+ ld $h2,16($ctx) -+ -+ ld $r0,24($ctx) # load key -+ ld $r1,32($ctx) -+ ld $rs1,40($ctx) -+ -+ dsll $len,4 -+ daddu $len,$inp # end of buffer -+ b .Loop -+ -+.align 4 -+.Loop: -+#if defined(_MIPS_ARCH_MIPS64R6) -+ ld $in0,0($inp) # load input -+ ld $in1,8($inp) -+ beqz $shr,.Laligned_inp -+ -+ ld $tmp2,16($inp) -+# ifdef MIPSEB -+ dsllv $in0,$in0,$shr -+ dsrlv $tmp3,$in1,$shl -+ dsllv $in1,$in1,$shr -+ dsrlv $tmp2,$tmp2,$shl -+# else -+ dsrlv $in0,$in0,$shr -+ dsllv $tmp3,$in1,$shl -+ dsrlv $in1,$in1,$shr -+ dsllv $tmp2,$tmp2,$shl -+# endif -+ or $in0,$in0,$tmp3 -+ or $in1,$in1,$tmp2 -+.Laligned_inp: -+#else -+ ldl $in0,0+MSB($inp) # load input -+ ldl $in1,8+MSB($inp) -+ ldr $in0,0+LSB($inp) -+ ldr $in1,8+LSB($inp) -+#endif -+ daddiu $inp,16 -+#ifdef MIPSEB -+# if defined(_MIPS_ARCH_MIPS64R2) -+ dsbh $in0,$in0 # byte swap -+ dsbh $in1,$in1 -+ dshd $in0,$in0 -+ dshd $in1,$in1 -+# else -+ ori $tmp0,$zero,0xFF -+ dsll $tmp2,$tmp0,32 -+ or $tmp0,$tmp2 # 0x000000FF000000FF -+ -+ and $tmp1,$in0,$tmp0 # byte swap -+ and $tmp3,$in1,$tmp0 -+ dsrl $tmp2,$in0,24 -+ dsrl $tmp4,$in1,24 -+ dsll $tmp1,24 -+ dsll $tmp3,24 -+ and $tmp2,$tmp0 -+ and $tmp4,$tmp0 -+ dsll $tmp0,8 # 0x0000FF000000FF00 -+ or $tmp1,$tmp2 -+ or $tmp3,$tmp4 -+ and $tmp2,$in0,$tmp0 -+ and $tmp4,$in1,$tmp0 -+ dsrl $in0,8 -+ dsrl $in1,8 -+ dsll $tmp2,8 -+ dsll $tmp4,8 -+ and $in0,$tmp0 -+ and $in1,$tmp0 -+ or $tmp1,$tmp2 -+ or $tmp3,$tmp4 -+ or $in0,$tmp1 -+ or $in1,$tmp3 -+ dsrl $tmp1,$in0,32 -+ dsrl $tmp3,$in1,32 -+ dsll $in0,32 -+ dsll $in1,32 -+ or $in0,$tmp1 -+ or $in1,$tmp3 -+# endif -+#endif -+ dsrl $tmp1,$h2,2 # modulo-scheduled reduction -+ andi $h2,$h2,3 -+ dsll $tmp0,$tmp1,2 -+ -+ daddu $d0,$h0,$in0 # accumulate input -+ daddu $tmp1,$tmp0 -+ sltu $tmp0,$d0,$h0 -+ daddu $d0,$d0,$tmp1 # ... and residue -+ sltu $tmp1,$d0,$tmp1 -+ daddu $d1,$h1,$in1 -+ daddu $tmp0,$tmp1 -+ sltu $tmp1,$d1,$h1 -+ daddu $d1,$tmp0 -+ -+ dmultu ($r0,$d0) # h0*r0 -+ daddu $d2,$h2,$padbit -+ sltu $tmp0,$d1,$tmp0 -+ mflo ($h0,$r0,$d0) -+ mfhi ($h1,$r0,$d0) -+ -+ dmultu ($rs1,$d1) # h1*5*r1 -+ daddu $d2,$tmp1 -+ daddu $d2,$tmp0 -+ mflo ($tmp0,$rs1,$d1) -+ mfhi ($tmp1,$rs1,$d1) -+ -+ dmultu ($r1,$d0) # h0*r1 -+ mflo ($tmp2,$r1,$d0) -+ mfhi ($h2,$r1,$d0) -+ daddu $h0,$tmp0 -+ daddu $h1,$tmp1 -+ sltu $tmp0,$h0,$tmp0 -+ -+ dmultu ($r0,$d1) # h1*r0 -+ daddu $h1,$tmp0 -+ daddu $h1,$tmp2 -+ mflo ($tmp0,$r0,$d1) -+ mfhi ($tmp1,$r0,$d1) -+ -+ dmultu ($rs1,$d2) # h2*5*r1 -+ sltu $tmp2,$h1,$tmp2 -+ daddu $h2,$tmp2 -+ mflo ($tmp2,$rs1,$d2) -+ -+ dmultu ($r0,$d2) # h2*r0 -+ daddu $h1,$tmp0 -+ daddu $h2,$tmp1 -+ mflo ($tmp3,$r0,$d2) -+ sltu $tmp0,$h1,$tmp0 -+ daddu $h2,$tmp0 -+ -+ daddu $h1,$tmp2 -+ sltu $tmp2,$h1,$tmp2 -+ daddu $h2,$tmp2 -+ daddu $h2,$tmp3 -+ -+ bne $inp,$len,.Loop -+ -+ sd $h0,0($ctx) # store hash value -+ sd $h1,8($ctx) -+ sd $h2,16($ctx) -+ -+ .set noreorder -+#if defined(_MIPS_ARCH_MIPS64R6) -+ ld $s7,56($sp) -+ ld $s6,48($sp) -+#endif -+ ld $s5,40($sp) # epilogue -+ ld $s4,32($sp) -+___ -+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue -+ ld $s3,24($sp) -+ ld $s2,16($sp) -+ ld $s1,8($sp) -+ ld $s0,0($sp) -+___ -+$code.=<<___; -+ jr $ra -+#if defined(_MIPS_ARCH_MIPS64R6) -+ daddu $sp,8*8 -+#else -+ daddu $sp,6*8 -+#endif -+.end poly1305_blocks_internal -+___ -+} -+{ -+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); -+ -+$code.=<<___; -+.align 5 -+.globl poly1305_emit -+.ent poly1305_emit -+poly1305_emit: -+ .frame $sp,0,$ra -+ .set reorder -+ -+ ld $tmp2,16($ctx) -+ ld $tmp0,0($ctx) -+ ld $tmp1,8($ctx) -+ -+ li $in0,-4 # final reduction -+ dsrl $in1,$tmp2,2 -+ and $in0,$tmp2 -+ andi $tmp2,$tmp2,3 -+ daddu $in0,$in1 -+ -+ daddu $tmp0,$tmp0,$in0 -+ sltu $in1,$tmp0,$in0 -+ daddiu $in0,$tmp0,5 # compare to modulus -+ daddu $tmp1,$tmp1,$in1 -+ sltiu $tmp3,$in0,5 -+ sltu $tmp4,$tmp1,$in1 -+ daddu $in1,$tmp1,$tmp3 -+ daddu $tmp2,$tmp2,$tmp4 -+ sltu $tmp3,$in1,$tmp3 -+ daddu $tmp2,$tmp2,$tmp3 -+ -+ dsrl $tmp2,2 # see if it carried/borrowed -+ dsubu $tmp2,$zero,$tmp2 -+ -+ xor $in0,$tmp0 -+ xor $in1,$tmp1 -+ and $in0,$tmp2 -+ and $in1,$tmp2 -+ xor $in0,$tmp0 -+ xor $in1,$tmp1 -+ -+ lwu $tmp0,0($nonce) # load nonce -+ lwu $tmp1,4($nonce) -+ lwu $tmp2,8($nonce) -+ lwu $tmp3,12($nonce) -+ dsll $tmp1,32 -+ dsll $tmp3,32 -+ or $tmp0,$tmp1 -+ or $tmp2,$tmp3 -+ -+ daddu $in0,$tmp0 # accumulate nonce -+ daddu $in1,$tmp2 -+ sltu $tmp0,$in0,$tmp0 -+ daddu $in1,$tmp0 -+ -+ dsrl $tmp0,$in0,8 # write mac value -+ dsrl $tmp1,$in0,16 -+ dsrl $tmp2,$in0,24 -+ sb $in0,0($mac) -+ dsrl $tmp3,$in0,32 -+ sb $tmp0,1($mac) -+ dsrl $tmp0,$in0,40 -+ sb $tmp1,2($mac) -+ dsrl $tmp1,$in0,48 -+ sb $tmp2,3($mac) -+ dsrl $tmp2,$in0,56 -+ sb $tmp3,4($mac) -+ dsrl $tmp3,$in1,8 -+ sb $tmp0,5($mac) -+ dsrl $tmp0,$in1,16 -+ sb $tmp1,6($mac) -+ dsrl $tmp1,$in1,24 -+ sb $tmp2,7($mac) -+ -+ sb $in1,8($mac) -+ dsrl $tmp2,$in1,32 -+ sb $tmp3,9($mac) -+ dsrl $tmp3,$in1,40 -+ sb $tmp0,10($mac) -+ dsrl $tmp0,$in1,48 -+ sb $tmp1,11($mac) -+ dsrl $tmp1,$in1,56 -+ sb $tmp2,12($mac) -+ sb $tmp3,13($mac) -+ sb $tmp0,14($mac) -+ sb $tmp1,15($mac) -+ -+ jr $ra -+.end poly1305_emit -+.rdata -+.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" -+.align 2 -+___ -+} -+}}} else {{{ -+###################################################################### -+# 32-bit code path -+# -+ -+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); -+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = -+ ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); -+ -+$code.=<<___; -+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ -+ defined(_MIPS_ARCH_MIPS32R6)) \\ -+ && !defined(_MIPS_ARCH_MIPS32R2) -+# define _MIPS_ARCH_MIPS32R2 -+#endif -+ -+#if defined(_MIPS_ARCH_MIPS32R6) -+# define multu(rs,rt) -+# define mflo(rd,rs,rt) mulu rd,rs,rt -+# define mfhi(rd,rs,rt) muhu rd,rs,rt -+#else -+# define multu(rs,rt) multu rs,rt -+# define mflo(rd,rs,rt) mflo rd -+# define mfhi(rd,rs,rt) mfhi rd -+#endif -+ -+#ifdef __KERNEL__ -+# define poly1305_init poly1305_init_mips -+# define poly1305_blocks poly1305_blocks_mips -+# define poly1305_emit poly1305_emit_mips -+#endif -+ -+#if defined(__MIPSEB__) && !defined(MIPSEB) -+# define MIPSEB -+#endif -+ -+#ifdef MIPSEB -+# define MSB 0 -+# define LSB 3 -+#else -+# define MSB 3 -+# define LSB 0 -+#endif -+ -+.text -+.set noat -+.set noreorder -+ -+.align 5 -+.globl poly1305_init -+.ent poly1305_init -+poly1305_init: -+ .frame $sp,0,$ra -+ .set reorder -+ -+ sw $zero,0($ctx) -+ sw $zero,4($ctx) -+ sw $zero,8($ctx) -+ sw $zero,12($ctx) -+ sw $zero,16($ctx) -+ -+ beqz $inp,.Lno_key -+ -+#if defined(_MIPS_ARCH_MIPS32R6) -+ andi $tmp0,$inp,3 # $inp % 4 -+ subu $inp,$inp,$tmp0 # align $inp -+ sll $tmp0,$tmp0,3 # byte to bit offset -+ lw $in0,0($inp) -+ lw $in1,4($inp) -+ lw $in2,8($inp) -+ lw $in3,12($inp) -+ beqz $tmp0,.Laligned_key -+ -+ lw $tmp2,16($inp) -+ subu $tmp1,$zero,$tmp0 -+# ifdef MIPSEB -+ sllv $in0,$in0,$tmp0 -+ srlv $tmp3,$in1,$tmp1 -+ sllv $in1,$in1,$tmp0 -+ or $in0,$in0,$tmp3 -+ srlv $tmp3,$in2,$tmp1 -+ sllv $in2,$in2,$tmp0 -+ or $in1,$in1,$tmp3 -+ srlv $tmp3,$in3,$tmp1 -+ sllv $in3,$in3,$tmp0 -+ or $in2,$in2,$tmp3 -+ srlv $tmp2,$tmp2,$tmp1 -+ or $in3,$in3,$tmp2 -+# else -+ srlv $in0,$in0,$tmp0 -+ sllv $tmp3,$in1,$tmp1 -+ srlv $in1,$in1,$tmp0 -+ or $in0,$in0,$tmp3 -+ sllv $tmp3,$in2,$tmp1 -+ srlv $in2,$in2,$tmp0 -+ or $in1,$in1,$tmp3 -+ sllv $tmp3,$in3,$tmp1 -+ srlv $in3,$in3,$tmp0 -+ or $in2,$in2,$tmp3 -+ sllv $tmp2,$tmp2,$tmp1 -+ or $in3,$in3,$tmp2 -+# endif -+.Laligned_key: -+#else -+ lwl $in0,0+MSB($inp) -+ lwl $in1,4+MSB($inp) -+ lwl $in2,8+MSB($inp) -+ lwl $in3,12+MSB($inp) -+ lwr $in0,0+LSB($inp) -+ lwr $in1,4+LSB($inp) -+ lwr $in2,8+LSB($inp) -+ lwr $in3,12+LSB($inp) -+#endif -+#ifdef MIPSEB -+# if defined(_MIPS_ARCH_MIPS32R2) -+ wsbh $in0,$in0 # byte swap -+ wsbh $in1,$in1 -+ wsbh $in2,$in2 -+ wsbh $in3,$in3 -+ rotr $in0,$in0,16 -+ rotr $in1,$in1,16 -+ rotr $in2,$in2,16 -+ rotr $in3,$in3,16 -+# else -+ srl $tmp0,$in0,24 # byte swap -+ srl $tmp1,$in0,8 -+ andi $tmp2,$in0,0xFF00 -+ sll $in0,$in0,24 -+ andi $tmp1,0xFF00 -+ sll $tmp2,$tmp2,8 -+ or $in0,$tmp0 -+ srl $tmp0,$in1,24 -+ or $tmp1,$tmp2 -+ srl $tmp2,$in1,8 -+ or $in0,$tmp1 -+ andi $tmp1,$in1,0xFF00 -+ sll $in1,$in1,24 -+ andi $tmp2,0xFF00 -+ sll $tmp1,$tmp1,8 -+ or $in1,$tmp0 -+ srl $tmp0,$in2,24 -+ or $tmp2,$tmp1 -+ srl $tmp1,$in2,8 -+ or $in1,$tmp2 -+ andi $tmp2,$in2,0xFF00 -+ sll $in2,$in2,24 -+ andi $tmp1,0xFF00 -+ sll $tmp2,$tmp2,8 -+ or $in2,$tmp0 -+ srl $tmp0,$in3,24 -+ or $tmp1,$tmp2 -+ srl $tmp2,$in3,8 -+ or $in2,$tmp1 -+ andi $tmp1,$in3,0xFF00 -+ sll $in3,$in3,24 -+ andi $tmp2,0xFF00 -+ sll $tmp1,$tmp1,8 -+ or $in3,$tmp0 -+ or $tmp2,$tmp1 -+ or $in3,$tmp2 -+# endif -+#endif -+ lui $tmp0,0x0fff -+ ori $tmp0,0xffff # 0x0fffffff -+ and $in0,$in0,$tmp0 -+ subu $tmp0,3 # 0x0ffffffc -+ and $in1,$in1,$tmp0 -+ and $in2,$in2,$tmp0 -+ and $in3,$in3,$tmp0 -+ -+ sw $in0,20($ctx) -+ sw $in1,24($ctx) -+ sw $in2,28($ctx) -+ sw $in3,32($ctx) -+ -+ srl $tmp1,$in1,2 -+ srl $tmp2,$in2,2 -+ srl $tmp3,$in3,2 -+ addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) -+ addu $in2,$in2,$tmp2 -+ addu $in3,$in3,$tmp3 -+ sw $in1,36($ctx) -+ sw $in2,40($ctx) -+ sw $in3,44($ctx) -+.Lno_key: -+ li $v0,0 -+ jr $ra -+.end poly1305_init -+___ -+{ -+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; -+ -+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = -+ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); -+my ($d0,$d1,$d2,$d3) = -+ ($a4,$a5,$a6,$a7); -+my $shr = $t2; # used on R6 -+my $one = $t2; # used on R2 -+ -+$code.=<<___; -+.globl poly1305_blocks -+.align 5 -+.ent poly1305_blocks -+poly1305_blocks: -+ .frame $sp,16*4,$ra -+ .mask $SAVED_REGS_MASK,-4 -+ .set noreorder -+ subu $sp, $sp,4*12 -+ sw $s11,4*11($sp) -+ sw $s10,4*10($sp) -+ sw $s9, 4*9($sp) -+ sw $s8, 4*8($sp) -+ sw $s7, 4*7($sp) -+ sw $s6, 4*6($sp) -+ sw $s5, 4*5($sp) -+ sw $s4, 4*4($sp) -+___ -+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue -+ sw $s3, 4*3($sp) -+ sw $s2, 4*2($sp) -+ sw $s1, 4*1($sp) -+ sw $s0, 4*0($sp) -+___ -+$code.=<<___; -+ .set reorder -+ -+ srl $len,4 # number of complete blocks -+ li $one,1 -+ beqz $len,.Labort -+ -+#if defined(_MIPS_ARCH_MIPS32R6) -+ andi $shr,$inp,3 -+ subu $inp,$inp,$shr # align $inp -+ sll $shr,$shr,3 # byte to bit offset -+#endif -+ -+ lw $h0,0($ctx) # load hash value -+ lw $h1,4($ctx) -+ lw $h2,8($ctx) -+ lw $h3,12($ctx) -+ lw $h4,16($ctx) -+ -+ lw $r0,20($ctx) # load key -+ lw $r1,24($ctx) -+ lw $r2,28($ctx) -+ lw $r3,32($ctx) -+ lw $rs1,36($ctx) -+ lw $rs2,40($ctx) -+ lw $rs3,44($ctx) -+ -+ sll $len,4 -+ addu $len,$len,$inp # end of buffer -+ b .Loop -+ -+.align 4 -+.Loop: -+#if defined(_MIPS_ARCH_MIPS32R6) -+ lw $d0,0($inp) # load input -+ lw $d1,4($inp) -+ lw $d2,8($inp) -+ lw $d3,12($inp) -+ beqz $shr,.Laligned_inp -+ -+ lw $t0,16($inp) -+ subu $t1,$zero,$shr -+# ifdef MIPSEB -+ sllv $d0,$d0,$shr -+ srlv $at,$d1,$t1 -+ sllv $d1,$d1,$shr -+ or $d0,$d0,$at -+ srlv $at,$d2,$t1 -+ sllv $d2,$d2,$shr -+ or $d1,$d1,$at -+ srlv $at,$d3,$t1 -+ sllv $d3,$d3,$shr -+ or $d2,$d2,$at -+ srlv $t0,$t0,$t1 -+ or $d3,$d3,$t0 -+# else -+ srlv $d0,$d0,$shr -+ sllv $at,$d1,$t1 -+ srlv $d1,$d1,$shr -+ or $d0,$d0,$at -+ sllv $at,$d2,$t1 -+ srlv $d2,$d2,$shr -+ or $d1,$d1,$at -+ sllv $at,$d3,$t1 -+ srlv $d3,$d3,$shr -+ or $d2,$d2,$at -+ sllv $t0,$t0,$t1 -+ or $d3,$d3,$t0 -+# endif -+.Laligned_inp: -+#else -+ lwl $d0,0+MSB($inp) # load input -+ lwl $d1,4+MSB($inp) -+ lwl $d2,8+MSB($inp) -+ lwl $d3,12+MSB($inp) -+ lwr $d0,0+LSB($inp) -+ lwr $d1,4+LSB($inp) -+ lwr $d2,8+LSB($inp) -+ lwr $d3,12+LSB($inp) -+#endif -+#ifdef MIPSEB -+# if defined(_MIPS_ARCH_MIPS32R2) -+ wsbh $d0,$d0 # byte swap -+ wsbh $d1,$d1 -+ wsbh $d2,$d2 -+ wsbh $d3,$d3 -+ rotr $d0,$d0,16 -+ rotr $d1,$d1,16 -+ rotr $d2,$d2,16 -+ rotr $d3,$d3,16 -+# else -+ srl $at,$d0,24 # byte swap -+ srl $t0,$d0,8 -+ andi $t1,$d0,0xFF00 -+ sll $d0,$d0,24 -+ andi $t0,0xFF00 -+ sll $t1,$t1,8 -+ or $d0,$at -+ srl $at,$d1,24 -+ or $t0,$t1 -+ srl $t1,$d1,8 -+ or $d0,$t0 -+ andi $t0,$d1,0xFF00 -+ sll $d1,$d1,24 -+ andi $t1,0xFF00 -+ sll $t0,$t0,8 -+ or $d1,$at -+ srl $at,$d2,24 -+ or $t1,$t0 -+ srl $t0,$d2,8 -+ or $d1,$t1 -+ andi $t1,$d2,0xFF00 -+ sll $d2,$d2,24 -+ andi $t0,0xFF00 -+ sll $t1,$t1,8 -+ or $d2,$at -+ srl $at,$d3,24 -+ or $t0,$t1 -+ srl $t1,$d3,8 -+ or $d2,$t0 -+ andi $t0,$d3,0xFF00 -+ sll $d3,$d3,24 -+ andi $t1,0xFF00 -+ sll $t0,$t0,8 -+ or $d3,$at -+ or $t1,$t0 -+ or $d3,$t1 -+# endif -+#endif -+ srl $t0,$h4,2 # modulo-scheduled reduction -+ andi $h4,$h4,3 -+ sll $at,$t0,2 -+ -+ addu $d0,$d0,$h0 # accumulate input -+ addu $t0,$t0,$at -+ sltu $h0,$d0,$h0 -+ addu $d0,$d0,$t0 # ... and residue -+ sltu $at,$d0,$t0 -+ -+ addu $d1,$d1,$h1 -+ addu $h0,$h0,$at # carry -+ sltu $h1,$d1,$h1 -+ addu $d1,$d1,$h0 -+ sltu $h0,$d1,$h0 -+ -+ addu $d2,$d2,$h2 -+ addu $h1,$h1,$h0 # carry -+ sltu $h2,$d2,$h2 -+ addu $d2,$d2,$h1 -+ sltu $h1,$d2,$h1 -+ -+ addu $d3,$d3,$h3 -+ addu $h2,$h2,$h1 # carry -+ sltu $h3,$d3,$h3 -+ addu $d3,$d3,$h2 -+ -+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) -+ multu $r0,$d0 # d0*r0 -+ sltu $h2,$d3,$h2 -+ maddu $rs3,$d1 # d1*s3 -+ addu $h3,$h3,$h2 # carry -+ maddu $rs2,$d2 # d2*s2 -+ addu $h4,$h4,$padbit -+ maddu $rs1,$d3 # d3*s1 -+ addu $h4,$h4,$h3 -+ mfhi $at -+ mflo $h0 -+ -+ multu $r1,$d0 # d0*r1 -+ maddu $r0,$d1 # d1*r0 -+ maddu $rs3,$d2 # d2*s3 -+ maddu $rs2,$d3 # d3*s2 -+ maddu $rs1,$h4 # h4*s1 -+ maddu $at,$one # hi*1 -+ mfhi $at -+ mflo $h1 -+ -+ multu $r2,$d0 # d0*r2 -+ maddu $r1,$d1 # d1*r1 -+ maddu $r0,$d2 # d2*r0 -+ maddu $rs3,$d3 # d3*s3 -+ maddu $rs2,$h4 # h4*s2 -+ maddu $at,$one # hi*1 -+ mfhi $at -+ mflo $h2 -+ -+ mul $t0,$r0,$h4 # h4*r0 -+ -+ multu $r3,$d0 # d0*r3 -+ maddu $r2,$d1 # d1*r2 -+ maddu $r1,$d2 # d2*r1 -+ maddu $r0,$d3 # d3*r0 -+ maddu $rs3,$h4 # h4*s3 -+ maddu $at,$one # hi*1 -+ mfhi $at -+ mflo $h3 -+ -+ addiu $inp,$inp,16 -+ -+ addu $h4,$t0,$at -+#else -+ multu ($r0,$d0) # d0*r0 -+ mflo ($h0,$r0,$d0) -+ mfhi ($h1,$r0,$d0) -+ -+ sltu $h2,$d3,$h2 -+ addu $h3,$h3,$h2 # carry -+ -+ multu ($rs3,$d1) # d1*s3 -+ mflo ($at,$rs3,$d1) -+ mfhi ($t0,$rs3,$d1) -+ -+ addu $h4,$h4,$padbit -+ addiu $inp,$inp,16 -+ addu $h4,$h4,$h3 -+ -+ multu ($rs2,$d2) # d2*s2 -+ mflo ($a3,$rs2,$d2) -+ mfhi ($t1,$rs2,$d2) -+ addu $h0,$h0,$at -+ addu $h1,$h1,$t0 -+ multu ($rs1,$d3) # d3*s1 -+ sltu $at,$h0,$at -+ addu $h1,$h1,$at -+ -+ mflo ($at,$rs1,$d3) -+ mfhi ($t0,$rs1,$d3) -+ addu $h0,$h0,$a3 -+ addu $h1,$h1,$t1 -+ multu ($r1,$d0) # d0*r1 -+ sltu $a3,$h0,$a3 -+ addu $h1,$h1,$a3 -+ -+ -+ mflo ($a3,$r1,$d0) -+ mfhi ($h2,$r1,$d0) -+ addu $h0,$h0,$at -+ addu $h1,$h1,$t0 -+ multu ($r0,$d1) # d1*r0 -+ sltu $at,$h0,$at -+ addu $h1,$h1,$at -+ -+ mflo ($at,$r0,$d1) -+ mfhi ($t0,$r0,$d1) -+ addu $h1,$h1,$a3 -+ sltu $a3,$h1,$a3 -+ multu ($rs3,$d2) # d2*s3 -+ addu $h2,$h2,$a3 -+ -+ mflo ($a3,$rs3,$d2) -+ mfhi ($t1,$rs3,$d2) -+ addu $h1,$h1,$at -+ addu $h2,$h2,$t0 -+ multu ($rs2,$d3) # d3*s2 -+ sltu $at,$h1,$at -+ addu $h2,$h2,$at -+ -+ mflo ($at,$rs2,$d3) -+ mfhi ($t0,$rs2,$d3) -+ addu $h1,$h1,$a3 -+ addu $h2,$h2,$t1 -+ multu ($rs1,$h4) # h4*s1 -+ sltu $a3,$h1,$a3 -+ addu $h2,$h2,$a3 -+ -+ mflo ($a3,$rs1,$h4) -+ addu $h1,$h1,$at -+ addu $h2,$h2,$t0 -+ multu ($r2,$d0) # d0*r2 -+ sltu $at,$h1,$at -+ addu $h2,$h2,$at -+ -+ -+ mflo ($at,$r2,$d0) -+ mfhi ($h3,$r2,$d0) -+ addu $h1,$h1,$a3 -+ sltu $a3,$h1,$a3 -+ multu ($r1,$d1) # d1*r1 -+ addu $h2,$h2,$a3 -+ -+ mflo ($a3,$r1,$d1) -+ mfhi ($t1,$r1,$d1) -+ addu $h2,$h2,$at -+ sltu $at,$h2,$at -+ multu ($r0,$d2) # d2*r0 -+ addu $h3,$h3,$at -+ -+ mflo ($at,$r0,$d2) -+ mfhi ($t0,$r0,$d2) -+ addu $h2,$h2,$a3 -+ addu $h3,$h3,$t1 -+ multu ($rs3,$d3) # d3*s3 -+ sltu $a3,$h2,$a3 -+ addu $h3,$h3,$a3 -+ -+ mflo ($a3,$rs3,$d3) -+ mfhi ($t1,$rs3,$d3) -+ addu $h2,$h2,$at -+ addu $h3,$h3,$t0 -+ multu ($rs2,$h4) # h4*s2 -+ sltu $at,$h2,$at -+ addu $h3,$h3,$at -+ -+ mflo ($at,$rs2,$h4) -+ addu $h2,$h2,$a3 -+ addu $h3,$h3,$t1 -+ multu ($r3,$d0) # d0*r3 -+ sltu $a3,$h2,$a3 -+ addu $h3,$h3,$a3 -+ -+ -+ mflo ($a3,$r3,$d0) -+ mfhi ($t1,$r3,$d0) -+ addu $h2,$h2,$at -+ sltu $at,$h2,$at -+ multu ($r2,$d1) # d1*r2 -+ addu $h3,$h3,$at -+ -+ mflo ($at,$r2,$d1) -+ mfhi ($t0,$r2,$d1) -+ addu $h3,$h3,$a3 -+ sltu $a3,$h3,$a3 -+ multu ($r0,$d3) # d3*r0 -+ addu $t1,$t1,$a3 -+ -+ mflo ($a3,$r0,$d3) -+ mfhi ($d3,$r0,$d3) -+ addu $h3,$h3,$at -+ addu $t1,$t1,$t0 -+ multu ($r1,$d2) # d2*r1 -+ sltu $at,$h3,$at -+ addu $t1,$t1,$at -+ -+ mflo ($at,$r1,$d2) -+ mfhi ($t0,$r1,$d2) -+ addu $h3,$h3,$a3 -+ addu $t1,$t1,$d3 -+ multu ($rs3,$h4) # h4*s3 -+ sltu $a3,$h3,$a3 -+ addu $t1,$t1,$a3 -+ -+ mflo ($a3,$rs3,$h4) -+ addu $h3,$h3,$at -+ addu $t1,$t1,$t0 -+ multu ($r0,$h4) # h4*r0 -+ sltu $at,$h3,$at -+ addu $t1,$t1,$at -+ -+ -+ mflo ($h4,$r0,$h4) -+ addu $h3,$h3,$a3 -+ sltu $a3,$h3,$a3 -+ addu $t1,$t1,$a3 -+ addu $h4,$h4,$t1 -+ -+ li $padbit,1 # if we loop, padbit is 1 -+#endif -+ bne $inp,$len,.Loop -+ -+ sw $h0,0($ctx) # store hash value -+ sw $h1,4($ctx) -+ sw $h2,8($ctx) -+ sw $h3,12($ctx) -+ sw $h4,16($ctx) -+ -+ .set noreorder -+.Labort: -+ lw $s11,4*11($sp) -+ lw $s10,4*10($sp) -+ lw $s9, 4*9($sp) -+ lw $s8, 4*8($sp) -+ lw $s7, 4*7($sp) -+ lw $s6, 4*6($sp) -+ lw $s5, 4*5($sp) -+ lw $s4, 4*4($sp) -+___ -+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue -+ lw $s3, 4*3($sp) -+ lw $s2, 4*2($sp) -+ lw $s1, 4*1($sp) -+ lw $s0, 4*0($sp) -+___ -+$code.=<<___; -+ jr $ra -+ addu $sp,$sp,4*12 -+.end poly1305_blocks -+___ -+} -+{ -+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); -+ -+$code.=<<___; -+.align 5 -+.globl poly1305_emit -+.ent poly1305_emit -+poly1305_emit: -+ .frame $sp,0,$ra -+ .set reorder -+ -+ lw $tmp4,16($ctx) -+ lw $tmp0,0($ctx) -+ lw $tmp1,4($ctx) -+ lw $tmp2,8($ctx) -+ lw $tmp3,12($ctx) -+ -+ li $in0,-4 # final reduction -+ srl $ctx,$tmp4,2 -+ and $in0,$in0,$tmp4 -+ andi $tmp4,$tmp4,3 -+ addu $ctx,$ctx,$in0 -+ -+ addu $tmp0,$tmp0,$ctx -+ sltu $ctx,$tmp0,$ctx -+ addiu $in0,$tmp0,5 # compare to modulus -+ addu $tmp1,$tmp1,$ctx -+ sltiu $in1,$in0,5 -+ sltu $ctx,$tmp1,$ctx -+ addu $in1,$in1,$tmp1 -+ addu $tmp2,$tmp2,$ctx -+ sltu $in2,$in1,$tmp1 -+ sltu $ctx,$tmp2,$ctx -+ addu $in2,$in2,$tmp2 -+ addu $tmp3,$tmp3,$ctx -+ sltu $in3,$in2,$tmp2 -+ sltu $ctx,$tmp3,$ctx -+ addu $in3,$in3,$tmp3 -+ addu $tmp4,$tmp4,$ctx -+ sltu $ctx,$in3,$tmp3 -+ addu $ctx,$tmp4 -+ -+ srl $ctx,2 # see if it carried/borrowed -+ subu $ctx,$zero,$ctx -+ -+ xor $in0,$tmp0 -+ xor $in1,$tmp1 -+ xor $in2,$tmp2 -+ xor $in3,$tmp3 -+ and $in0,$ctx -+ and $in1,$ctx -+ and $in2,$ctx -+ and $in3,$ctx -+ xor $in0,$tmp0 -+ xor $in1,$tmp1 -+ xor $in2,$tmp2 -+ xor $in3,$tmp3 -+ -+ lw $tmp0,0($nonce) # load nonce -+ lw $tmp1,4($nonce) -+ lw $tmp2,8($nonce) -+ lw $tmp3,12($nonce) -+ -+ addu $in0,$tmp0 # accumulate nonce -+ sltu $ctx,$in0,$tmp0 -+ -+ addu $in1,$tmp1 -+ sltu $tmp1,$in1,$tmp1 -+ addu $in1,$ctx -+ sltu $ctx,$in1,$ctx -+ addu $ctx,$tmp1 -+ -+ addu $in2,$tmp2 -+ sltu $tmp2,$in2,$tmp2 -+ addu $in2,$ctx -+ sltu $ctx,$in2,$ctx -+ addu $ctx,$tmp2 -+ -+ addu $in3,$tmp3 -+ addu $in3,$ctx -+ -+ srl $tmp0,$in0,8 # write mac value -+ srl $tmp1,$in0,16 -+ srl $tmp2,$in0,24 -+ sb $in0, 0($mac) -+ sb $tmp0,1($mac) -+ srl $tmp0,$in1,8 -+ sb $tmp1,2($mac) -+ srl $tmp1,$in1,16 -+ sb $tmp2,3($mac) -+ srl $tmp2,$in1,24 -+ sb $in1, 4($mac) -+ sb $tmp0,5($mac) -+ srl $tmp0,$in2,8 -+ sb $tmp1,6($mac) -+ srl $tmp1,$in2,16 -+ sb $tmp2,7($mac) -+ srl $tmp2,$in2,24 -+ sb $in2, 8($mac) -+ sb $tmp0,9($mac) -+ srl $tmp0,$in3,8 -+ sb $tmp1,10($mac) -+ srl $tmp1,$in3,16 -+ sb $tmp2,11($mac) -+ srl $tmp2,$in3,24 -+ sb $in3, 12($mac) -+ sb $tmp0,13($mac) -+ sb $tmp1,14($mac) -+ sb $tmp2,15($mac) -+ -+ jr $ra -+.end poly1305_emit -+.rdata -+.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" -+.align 2 -+___ -+} -+}}} -+ -+$output=pop and open STDOUT,">$output"; -+print $code; -+close STDOUT; ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -707,6 +707,11 @@ config CRYPTO_POLY1305_X86_64 - in IETF protocols. This is the x86_64 assembler implementation using SIMD - instructions. - -+config CRYPTO_POLY1305_MIPS -+ tristate "Poly1305 authenticator algorithm (MIPS optimized)" -+ depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) -+ select CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ - config CRYPTO_MD4 - tristate "MD4 digest algorithm" - select CRYPTO_HASH ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES - - config CRYPTO_LIB_POLY1305_RSIZE - int -+ default 2 if MIPS - default 4 if X86_64 - default 9 if ARM || ARM64 - default 1 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0021-crypto-blake2s-generic-C-library-implementation-and-.patch b/target/linux/generic/backport-5.4/080-wireguard-0021-crypto-blake2s-generic-C-library-implementation-and-.patch deleted file mode 100644 index 97f73b983a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0021-crypto-blake2s-generic-C-library-implementation-and-.patch +++ /dev/null @@ -1,1097 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:28 +0100 -Subject: [PATCH] crypto: blake2s - generic C library implementation and - selftest - -commit 66d7fb94e4ffe5acc589e0b2b4710aecc1f07a28 upstream. - -The C implementation was originally based on Samuel Neves' public -domain reference implementation but has since been heavily modified -for the kernel. We're able to do compile-time optimizations by moving -some scaffolding around the final function into the header file. - -Information: https://blake2.net/ - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Samuel Neves <sneves@dei.uc.pt> -Co-developed-by: Samuel Neves <sneves@dei.uc.pt> -[ardb: - move from lib/zinc to lib/crypto - - remove simd handling - - rewrote selftest for better coverage - - use fixed digest length for blake2s_hmac() and rename to - blake2s256_hmac() ] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/blake2s.h | 106 +++++ - include/crypto/internal/blake2s.h | 19 + - lib/crypto/Kconfig | 25 ++ - lib/crypto/Makefile | 10 + - lib/crypto/blake2s-generic.c | 111 ++++++ - lib/crypto/blake2s-selftest.c | 622 ++++++++++++++++++++++++++++++ - lib/crypto/blake2s.c | 126 ++++++ - 7 files changed, 1019 insertions(+) - create mode 100644 include/crypto/blake2s.h - create mode 100644 include/crypto/internal/blake2s.h - create mode 100644 lib/crypto/blake2s-generic.c - create mode 100644 lib/crypto/blake2s-selftest.c - create mode 100644 lib/crypto/blake2s.c - ---- /dev/null -+++ b/include/crypto/blake2s.h -@@ -0,0 +1,106 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef BLAKE2S_H -+#define BLAKE2S_H -+ -+#include <linux/types.h> -+#include <linux/kernel.h> -+#include <linux/string.h> -+ -+#include <asm/bug.h> -+ -+enum blake2s_lengths { -+ BLAKE2S_BLOCK_SIZE = 64, -+ BLAKE2S_HASH_SIZE = 32, -+ BLAKE2S_KEY_SIZE = 32, -+ -+ BLAKE2S_128_HASH_SIZE = 16, -+ BLAKE2S_160_HASH_SIZE = 20, -+ BLAKE2S_224_HASH_SIZE = 28, -+ BLAKE2S_256_HASH_SIZE = 32, -+}; -+ -+struct blake2s_state { -+ u32 h[8]; -+ u32 t[2]; -+ u32 f[2]; -+ u8 buf[BLAKE2S_BLOCK_SIZE]; -+ unsigned int buflen; -+ unsigned int outlen; -+}; -+ -+enum blake2s_iv { -+ BLAKE2S_IV0 = 0x6A09E667UL, -+ BLAKE2S_IV1 = 0xBB67AE85UL, -+ BLAKE2S_IV2 = 0x3C6EF372UL, -+ BLAKE2S_IV3 = 0xA54FF53AUL, -+ BLAKE2S_IV4 = 0x510E527FUL, -+ BLAKE2S_IV5 = 0x9B05688CUL, -+ BLAKE2S_IV6 = 0x1F83D9ABUL, -+ BLAKE2S_IV7 = 0x5BE0CD19UL, -+}; -+ -+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen); -+void blake2s_final(struct blake2s_state *state, u8 *out); -+ -+static inline void blake2s_init_param(struct blake2s_state *state, -+ const u32 param) -+{ -+ *state = (struct blake2s_state){{ -+ BLAKE2S_IV0 ^ param, -+ BLAKE2S_IV1, -+ BLAKE2S_IV2, -+ BLAKE2S_IV3, -+ BLAKE2S_IV4, -+ BLAKE2S_IV5, -+ BLAKE2S_IV6, -+ BLAKE2S_IV7, -+ }}; -+} -+ -+static inline void blake2s_init(struct blake2s_state *state, -+ const size_t outlen) -+{ -+ blake2s_init_param(state, 0x01010000 | outlen); -+ state->outlen = outlen; -+} -+ -+static inline void blake2s_init_key(struct blake2s_state *state, -+ const size_t outlen, const void *key, -+ const size_t keylen) -+{ -+ WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE || -+ !key || !keylen || keylen > BLAKE2S_KEY_SIZE)); -+ -+ blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); -+ memcpy(state->buf, key, keylen); -+ state->buflen = BLAKE2S_BLOCK_SIZE; -+ state->outlen = outlen; -+} -+ -+static inline void blake2s(u8 *out, const u8 *in, const u8 *key, -+ const size_t outlen, const size_t inlen, -+ const size_t keylen) -+{ -+ struct blake2s_state state; -+ -+ WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen || -+ outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE || -+ (!key && keylen))); -+ -+ if (keylen) -+ blake2s_init_key(&state, outlen, key, keylen); -+ else -+ blake2s_init(&state, outlen); -+ -+ blake2s_update(&state, in, inlen); -+ blake2s_final(&state, out); -+} -+ -+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, -+ const size_t keylen); -+ -+#endif /* BLAKE2S_H */ ---- /dev/null -+++ b/include/crypto/internal/blake2s.h -@@ -0,0 +1,19 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+ -+#ifndef BLAKE2S_INTERNAL_H -+#define BLAKE2S_INTERNAL_H -+ -+#include <crypto/blake2s.h> -+ -+void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, -+ size_t nblocks, const u32 inc); -+ -+void blake2s_compress_arch(struct blake2s_state *state,const u8 *block, -+ size_t nblocks, const u32 inc); -+ -+static inline void blake2s_set_lastblock(struct blake2s_state *state) -+{ -+ state->f[0] = -1; -+} -+ -+#endif /* BLAKE2S_INTERNAL_H */ ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -8,6 +8,31 @@ config CRYPTO_LIB_AES - config CRYPTO_LIB_ARC4 - tristate - -+config CRYPTO_ARCH_HAVE_LIB_BLAKE2S -+ tristate -+ help -+ Declares whether the architecture provides an arch-specific -+ accelerated implementation of the Blake2s library interface, -+ either builtin or as a module. -+ -+config CRYPTO_LIB_BLAKE2S_GENERIC -+ tristate -+ help -+ This symbol can be depended upon by arch implementations of the -+ Blake2s library interface that require the generic code as a -+ fallback, e.g., for SIMD implementations. If no arch specific -+ implementation is enabled, this implementation serves the users -+ of CRYPTO_LIB_BLAKE2S. -+ -+config CRYPTO_LIB_BLAKE2S -+ tristate "BLAKE2s hash function library" -+ depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S -+ select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n -+ help -+ Enable the Blake2s library interface. This interface may be fulfilled -+ by either the generic implementation or an arch-specific one, if one -+ is available and enabled. -+ - config CRYPTO_ARCH_HAVE_LIB_CHACHA - tristate - help ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -10,6 +10,12 @@ libaes-y := aes.o - obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o - libarc4-y := arc4.o - -+obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o -+libblake2s-generic-y += blake2s-generic.o -+ -+obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o -+libblake2s-y += blake2s.o -+ - obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o - libdes-y := des.o - -@@ -18,3 +24,7 @@ libpoly1305-y := poly1305.o - - obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o - libsha256-y := sha256.o -+ -+ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) -+libblake2s-y += blake2s-selftest.o -+endif ---- /dev/null -+++ b/lib/crypto/blake2s-generic.c -@@ -0,0 +1,111 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is an implementation of the BLAKE2s hash and PRF functions. -+ * -+ * Information: https://blake2.net/ -+ * -+ */ -+ -+#include <crypto/internal/blake2s.h> -+#include <linux/types.h> -+#include <linux/string.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/bug.h> -+#include <asm/unaligned.h> -+ -+static const u8 blake2s_sigma[10][16] = { -+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, -+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, -+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, -+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, -+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, -+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, -+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, -+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, -+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, -+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, -+}; -+ -+static inline void blake2s_increment_counter(struct blake2s_state *state, -+ const u32 inc) -+{ -+ state->t[0] += inc; -+ state->t[1] += (state->t[0] < inc); -+} -+ -+void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, -+ size_t nblocks, const u32 inc) -+{ -+ u32 m[16]; -+ u32 v[16]; -+ int i; -+ -+ WARN_ON(IS_ENABLED(DEBUG) && -+ (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); -+ -+ while (nblocks > 0) { -+ blake2s_increment_counter(state, inc); -+ memcpy(m, block, BLAKE2S_BLOCK_SIZE); -+ le32_to_cpu_array(m, ARRAY_SIZE(m)); -+ memcpy(v, state->h, 32); -+ v[ 8] = BLAKE2S_IV0; -+ v[ 9] = BLAKE2S_IV1; -+ v[10] = BLAKE2S_IV2; -+ v[11] = BLAKE2S_IV3; -+ v[12] = BLAKE2S_IV4 ^ state->t[0]; -+ v[13] = BLAKE2S_IV5 ^ state->t[1]; -+ v[14] = BLAKE2S_IV6 ^ state->f[0]; -+ v[15] = BLAKE2S_IV7 ^ state->f[1]; -+ -+#define G(r, i, a, b, c, d) do { \ -+ a += b + m[blake2s_sigma[r][2 * i + 0]]; \ -+ d = ror32(d ^ a, 16); \ -+ c += d; \ -+ b = ror32(b ^ c, 12); \ -+ a += b + m[blake2s_sigma[r][2 * i + 1]]; \ -+ d = ror32(d ^ a, 8); \ -+ c += d; \ -+ b = ror32(b ^ c, 7); \ -+} while (0) -+ -+#define ROUND(r) do { \ -+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ -+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ -+ G(r, 2, v[2], v[ 6], v[10], v[14]); \ -+ G(r, 3, v[3], v[ 7], v[11], v[15]); \ -+ G(r, 4, v[0], v[ 5], v[10], v[15]); \ -+ G(r, 5, v[1], v[ 6], v[11], v[12]); \ -+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ -+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ -+} while (0) -+ ROUND(0); -+ ROUND(1); -+ ROUND(2); -+ ROUND(3); -+ ROUND(4); -+ ROUND(5); -+ ROUND(6); -+ ROUND(7); -+ ROUND(8); -+ ROUND(9); -+ -+#undef G -+#undef ROUND -+ -+ for (i = 0; i < 8; ++i) -+ state->h[i] ^= v[i] ^ v[i + 8]; -+ -+ block += BLAKE2S_BLOCK_SIZE; -+ --nblocks; -+ } -+} -+ -+EXPORT_SYMBOL(blake2s_compress_generic); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("BLAKE2s hash function"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); ---- /dev/null -+++ b/lib/crypto/blake2s-selftest.c -@@ -0,0 +1,622 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/blake2s.h> -+#include <linux/string.h> -+ -+/* -+ * blake2s_testvecs[] generated with the program below (using libb2-dev and -+ * libssl-dev [OpenSSL]) -+ * -+ * #include <blake2.h> -+ * #include <stdint.h> -+ * #include <stdio.h> -+ * -+ * #include <openssl/evp.h> -+ * #include <openssl/hmac.h> -+ * -+ * #define BLAKE2S_TESTVEC_COUNT 256 -+ * -+ * static void print_vec(const uint8_t vec[], int len) -+ * { -+ * int i; -+ * -+ * printf(" { "); -+ * for (i = 0; i < len; i++) { -+ * if (i && (i % 12) == 0) -+ * printf("\n "); -+ * printf("0x%02x, ", vec[i]); -+ * } -+ * printf("},\n"); -+ * } -+ * -+ * int main(void) -+ * { -+ * uint8_t key[BLAKE2S_KEYBYTES]; -+ * uint8_t buf[BLAKE2S_TESTVEC_COUNT]; -+ * uint8_t hash[BLAKE2S_OUTBYTES]; -+ * int i, j; -+ * -+ * key[0] = key[1] = 1; -+ * for (i = 2; i < BLAKE2S_KEYBYTES; ++i) -+ * key[i] = key[i - 2] + key[i - 1]; -+ * -+ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) -+ * buf[i] = (uint8_t)i; -+ * -+ * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); -+ * -+ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) { -+ * int outlen = 1 + i % BLAKE2S_OUTBYTES; -+ * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1); -+ * -+ * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i, -+ * keylen); -+ * print_vec(hash, outlen); -+ * } -+ * printf("};\n\n"); -+ * -+ * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); -+ * -+ * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL); -+ * print_vec(hash, BLAKE2S_OUTBYTES); -+ * -+ * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL); -+ * print_vec(hash, BLAKE2S_OUTBYTES); -+ * -+ * printf("};\n"); -+ * -+ * return 0; -+ *} -+ */ -+static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { -+ { 0xa1, }, -+ { 0x7c, 0x89, }, -+ { 0x74, 0x0e, 0xd4, }, -+ { 0x47, 0x0c, 0x21, 0x15, }, -+ { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, }, -+ { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, }, -+ { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, }, -+ { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, }, -+ { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, }, -+ { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, }, -+ { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, }, -+ { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, }, -+ { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda, -+ 0xb7, }, -+ { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25, -+ 0x52, 0x3e, }, -+ { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc, -+ 0x57, 0xe2, 0x27, }, -+ { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6, -+ 0x47, 0x2a, 0xc7, 0xf5, }, -+ { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43, -+ 0xe7, 0x72, 0x08, 0xec, 0xbe, }, -+ { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96, -+ 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, }, -+ { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e, -+ 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, }, -+ { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d, -+ 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, }, -+ { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86, -+ 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, }, -+ { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41, -+ 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, }, -+ { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22, -+ 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, }, -+ { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00, -+ 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, }, -+ { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9, -+ 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13, -+ 0xd1, }, -+ { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05, -+ 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07, -+ 0x01, 0x3e, }, -+ { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74, -+ 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b, -+ 0xec, 0x13, 0xed, }, -+ { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7, -+ 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37, -+ 0x72, 0x67, 0x09, 0xfc, }, -+ { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38, -+ 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c, -+ 0x95, 0x29, 0x19, 0xf2, 0x4b, }, -+ { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22, -+ 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30, -+ 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, }, -+ { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e, -+ 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd, -+ 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, }, -+ { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe, -+ 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61, -+ 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, }, -+ { 0x0a, }, -+ { 0x6e, 0xd4, }, -+ { 0x64, 0xe9, 0xd1, }, -+ { 0x30, 0xdd, 0x71, 0xef, }, -+ { 0x11, 0xb5, 0x0c, 0x87, 0xc9, }, -+ { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, }, -+ { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, }, -+ { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, }, -+ { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, }, -+ { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, }, -+ { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, }, -+ { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, }, -+ { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf, -+ 0xe2, }, -+ { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64, -+ 0x7e, 0xb0, }, -+ { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51, -+ 0x98, 0x0a, 0x8d, }, -+ { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04, -+ 0xf1, 0xc3, 0x94, 0xd8, }, -+ { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7, -+ 0x2c, 0xe8, 0x8f, 0xc7, 0x34, }, -+ { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41, -+ 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, }, -+ { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81, -+ 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, }, -+ { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4, -+ 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, }, -+ { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a, -+ 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, }, -+ { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb, -+ 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, }, -+ { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9, -+ 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, }, -+ { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9, -+ 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, }, -+ { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e, -+ 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42, -+ 0xe3, }, -+ { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9, -+ 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e, -+ 0xe3, 0x90, }, -+ { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3, -+ 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10, -+ 0x58, 0x06, 0x9a, }, -+ { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5, -+ 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d, -+ 0x53, 0x03, 0x1a, 0x39, }, -+ { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0, -+ 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5, -+ 0xd4, 0x36, 0xb1, 0xac, 0x73, }, -+ { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59, -+ 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90, -+ 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, }, -+ { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28, -+ 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75, -+ 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, }, -+ { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6, -+ 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77, -+ 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, }, -+ { 0x9d, }, -+ { 0x9d, 0x7d, }, -+ { 0xfd, 0xc3, 0xda, }, -+ { 0xe8, 0x82, 0xcd, 0x21, }, -+ { 0xc3, 0x1d, 0x42, 0x4c, 0x74, }, -+ { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, }, -+ { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, }, -+ { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, }, -+ { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, }, -+ { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, }, -+ { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, }, -+ { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, }, -+ { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3, -+ 0x63, }, -+ { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f, -+ 0xe6, 0xa9, }, -+ { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a, -+ 0xf6, 0x0e, 0x20, }, -+ { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39, -+ 0x18, 0x3e, 0xc7, 0xc3, }, -+ { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c, -+ 0x92, 0xfc, 0x7f, 0x34, 0x41, }, -+ { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb, -+ 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, }, -+ { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96, -+ 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, }, -+ { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6, -+ 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, }, -+ { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba, -+ 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, }, -+ { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0, -+ 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, }, -+ { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d, -+ 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, }, -+ { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59, -+ 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, }, -+ { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89, -+ 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e, -+ 0xaf, }, -+ { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc, -+ 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7, -+ 0xb1, 0x1d, }, -+ { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9, -+ 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf, -+ 0xee, 0x6a, 0xbe, }, -+ { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8, -+ 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd, -+ 0x5c, 0xef, 0xa3, 0x25, }, -+ { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6, -+ 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8, -+ 0x4b, 0x0e, 0xe3, 0x94, 0x9f, }, -+ { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc, -+ 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf, -+ 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, }, -+ { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76, -+ 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46, -+ 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, }, -+ { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02, -+ 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3, -+ 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, }, -+ { 0x02, }, -+ { 0x52, 0xa8, }, -+ { 0x38, 0x25, 0x0d, }, -+ { 0xe3, 0x04, 0xd4, 0x92, }, -+ { 0x97, 0xdb, 0xf7, 0x81, 0xca, }, -+ { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, }, -+ { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, }, -+ { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, }, -+ { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, }, -+ { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, }, -+ { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, }, -+ { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, }, -+ { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa, -+ 0xd3, }, -+ { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c, -+ 0xb7, 0x9a, }, -+ { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31, -+ 0x3d, 0x47, 0x3d, }, -+ { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0, -+ 0x24, 0xf0, 0x17, 0xc0, }, -+ { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76, -+ 0xd2, 0xfd, 0x0c, 0x47, 0x40, }, -+ { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae, -+ 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, }, -+ { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee, -+ 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, }, -+ { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d, -+ 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, }, -+ { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61, -+ 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, }, -+ { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd, -+ 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, }, -+ { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5, -+ 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, }, -+ { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17, -+ 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, }, -+ { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c, -+ 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c, -+ 0xae, }, -+ { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5, -+ 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d, -+ 0x59, 0x00, }, -+ { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e, -+ 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c, -+ 0x5a, 0x2c, 0x3b, }, -+ { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda, -+ 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c, -+ 0x10, 0x07, 0x2a, 0xc4, }, -+ { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14, -+ 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf, -+ 0xee, 0xa1, 0x74, 0xd6, 0x71, }, -+ { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33, -+ 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff, -+ 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, }, -+ { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c, -+ 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44, -+ 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, }, -+ { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff, -+ 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23, -+ 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, }, -+ { 0xbe, }, -+ { 0x17, 0x6c, }, -+ { 0x1a, 0x42, 0x4f, }, -+ { 0xba, 0xaf, 0xb7, 0x65, }, -+ { 0xc2, 0x63, 0x43, 0x6a, 0xea, }, -+ { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, }, -+ { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, }, -+ { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, }, -+ { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, }, -+ { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, }, -+ { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, }, -+ { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, }, -+ { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92, -+ 0xe9, }, -+ { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d, -+ 0xc4, 0xb3, }, -+ { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60, -+ 0xd7, 0xd3, 0x5e, }, -+ { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16, -+ 0xaf, 0x39, 0xbd, 0x92, }, -+ { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10, -+ 0xd9, 0xa7, 0x66, 0x27, 0xcf, }, -+ { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02, -+ 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, }, -+ { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd, -+ 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, }, -+ { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3, -+ 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, }, -+ { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f, -+ 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, }, -+ { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51, -+ 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, }, -+ { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3, -+ 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, }, -+ { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda, -+ 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, }, -+ { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6, -+ 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a, -+ 0x26, }, -+ { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24, -+ 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35, -+ 0xf4, 0x1d, }, -+ { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9, -+ 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46, -+ 0x4e, 0x29, 0x26, }, -+ { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09, -+ 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98, -+ 0x1a, 0x5b, 0xff, 0xc9, }, -+ { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2, -+ 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69, -+ 0xbf, 0xf6, 0xbe, 0x3c, 0x40, }, -+ { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31, -+ 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20, -+ 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, }, -+ { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4, -+ 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30, -+ 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, }, -+ { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0, -+ 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5, -+ 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, }, -+ { 0x1f, }, -+ { 0x82, 0x60, }, -+ { 0xcc, 0xe3, 0x08, }, -+ { 0x56, 0x17, 0xe4, 0x59, }, -+ { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, }, -+ { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, }, -+ { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, }, -+ { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, }, -+ { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, }, -+ { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, }, -+ { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, }, -+ { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, }, -+ { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc, -+ 0x5b, }, -+ { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0, -+ 0x90, 0x48, }, -+ { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60, -+ 0x04, 0xd9, 0xd5, }, -+ { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47, -+ 0xe5, 0x31, 0x06, 0x70, }, -+ { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c, -+ 0x70, 0x25, 0xdb, 0x33, 0x27, }, -+ { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a, -+ 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, }, -+ { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3, -+ 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, }, -+ { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7, -+ 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, }, -+ { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac, -+ 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, }, -+ { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98, -+ 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, }, -+ { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11, -+ 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, }, -+ { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a, -+ 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, }, -+ { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41, -+ 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70, -+ 0x88, }, -+ { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4, -+ 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3, -+ 0xc6, 0xbb, }, -+ { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55, -+ 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5, -+ 0x55, 0xfd, 0x4f, }, -+ { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e, -+ 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2, -+ 0x42, 0x64, 0x3b, 0x1a, }, -+ { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95, -+ 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5, -+ 0x1d, 0x60, 0x8f, 0x8a, 0x1d, }, -+ { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4, -+ 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d, -+ 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, }, -+ { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b, -+ 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3, -+ 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, }, -+ { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf, -+ 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f, -+ 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, }, -+ { 0x60, }, -+ { 0x24, 0x26, }, -+ { 0x47, 0xeb, 0xc9, }, -+ { 0x4a, 0xd0, 0xbc, 0xf0, }, -+ { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, }, -+ { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, }, -+ { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, }, -+ { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, }, -+ { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, }, -+ { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, }, -+ { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, }, -+ { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, }, -+ { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91, -+ 0x8d, }, -+ { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33, -+ 0xbf, 0xa0, }, -+ { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c, -+ 0xd5, 0x4b, 0xed, }, -+ { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44, -+ 0xc8, 0x24, 0x0a, 0xb7, }, -+ { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75, -+ 0xb2, 0x15, 0xd1, 0xb1, 0x10, }, -+ { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35, -+ 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, }, -+ { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9, -+ 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, }, -+ { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47, -+ 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, }, -+ { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35, -+ 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, }, -+ { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1, -+ 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, }, -+ { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21, -+ 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, }, -+ { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4, -+ 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, }, -+ { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7, -+ 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7, -+ 0xd3, }, -+ { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb, -+ 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16, -+ 0xa6, 0xd6, }, -+ { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80, -+ 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88, -+ 0x55, 0xd1, 0xa9, }, -+ { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54, -+ 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46, -+ 0xef, 0xb0, 0x00, 0x8d, }, -+ { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8, -+ 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94, -+ 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, }, -+ { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4, -+ 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67, -+ 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, }, -+ { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02, -+ 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04, -+ 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, }, -+ { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28, -+ 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca, -+ 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, }, -+ { 0x7e, }, -+ { 0x1e, 0x21, }, -+ { 0x26, 0xd3, 0xdd, }, -+ { 0x2c, 0xd4, 0xb3, 0x3d, }, -+ { 0x86, 0x7b, 0x76, 0x3c, 0xf0, }, -+ { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, }, -+ { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, }, -+ { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, }, -+ { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, }, -+ { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, }, -+ { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, }, -+ { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, }, -+ { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77, -+ 0x66, }, -+ { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31, -+ 0x55, 0x66, }, -+ { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12, -+ 0x43, 0xbf, 0xa1, }, -+ { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e, -+ 0x95, 0x8a, 0xc5, 0xed, }, -+ { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef, -+ 0xf6, 0x2b, 0x3a, 0xba, 0x60, }, -+ { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2, -+ 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, }, -+ { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b, -+ 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, }, -+ { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94, -+ 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, }, -+ { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b, -+ 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, }, -+ { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf, -+ 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, }, -+ { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8, -+ 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, }, -+ { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea, -+ 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, }, -+ { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2, -+ 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0, -+ 0xd7, }, -+ { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b, -+ 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd, -+ 0xb6, 0xef, }, -+ { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6, -+ 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57, -+ 0xef, 0xf8, 0x53, }, -+ { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46, -+ 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89, -+ 0x89, 0xfd, 0xf7, 0x99, }, -+ { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03, -+ 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a, -+ 0xa9, 0x8f, 0x2b, 0x59, 0xfb, }, -+ { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb, -+ 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d, -+ 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, }, -+ { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0, -+ 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d, -+ 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, }, -+ { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c, -+ 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92, -+ 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, -+}; -+ -+static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { -+ { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70, -+ 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79, -+ 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, }, -+ { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9, -+ 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f, -+ 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, }, -+}; -+ -+bool __init blake2s_selftest(void) -+{ -+ u8 key[BLAKE2S_KEY_SIZE]; -+ u8 buf[ARRAY_SIZE(blake2s_testvecs)]; -+ u8 hash[BLAKE2S_HASH_SIZE]; -+ struct blake2s_state state; -+ bool success = true; -+ int i, l; -+ -+ key[0] = key[1] = 1; -+ for (i = 2; i < sizeof(key); ++i) -+ key[i] = key[i - 2] + key[i - 1]; -+ -+ for (i = 0; i < sizeof(buf); ++i) -+ buf[i] = (u8)i; -+ -+ for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) { -+ int outlen = 1 + i % BLAKE2S_HASH_SIZE; -+ int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1); -+ -+ blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i, -+ keylen); -+ if (memcmp(hash, blake2s_testvecs[i], outlen)) { -+ pr_err("blake2s self-test %d: FAIL\n", i + 1); -+ success = false; -+ } -+ -+ if (!keylen) -+ blake2s_init(&state, outlen); -+ else -+ blake2s_init_key(&state, outlen, -+ key + BLAKE2S_KEY_SIZE - keylen, -+ keylen); -+ -+ blake2s_update(&state, buf, l); -+ blake2s_update(&state, buf + l, i - l); -+ blake2s_final(&state, hash); -+ if (memcmp(hash, blake2s_testvecs[i], outlen)) { -+ pr_err("blake2s init/update/final self-test %d: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ -+ if (success) { -+ blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key)); -+ success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE); -+ -+ blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf)); -+ success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE); -+ -+ if (!success) -+ pr_err("blake2s256_hmac self-test: FAIL\n"); -+ } -+ -+ return success; -+} ---- /dev/null -+++ b/lib/crypto/blake2s.c -@@ -0,0 +1,126 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is an implementation of the BLAKE2s hash and PRF functions. -+ * -+ * Information: https://blake2.net/ -+ * -+ */ -+ -+#include <crypto/internal/blake2s.h> -+#include <linux/types.h> -+#include <linux/string.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/bug.h> -+#include <asm/unaligned.h> -+ -+bool blake2s_selftest(void); -+ -+void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) -+{ -+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; -+ -+ if (unlikely(!inlen)) -+ return; -+ if (inlen > fill) { -+ memcpy(state->buf + state->buflen, in, fill); -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) -+ blake2s_compress_arch(state, state->buf, 1, -+ BLAKE2S_BLOCK_SIZE); -+ else -+ blake2s_compress_generic(state, state->buf, 1, -+ BLAKE2S_BLOCK_SIZE); -+ state->buflen = 0; -+ in += fill; -+ inlen -= fill; -+ } -+ if (inlen > BLAKE2S_BLOCK_SIZE) { -+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); -+ /* Hash one less (full) block than strictly possible */ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) -+ blake2s_compress_arch(state, in, nblocks - 1, -+ BLAKE2S_BLOCK_SIZE); -+ else -+ blake2s_compress_generic(state, in, nblocks - 1, -+ BLAKE2S_BLOCK_SIZE); -+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ } -+ memcpy(state->buf + state->buflen, in, inlen); -+ state->buflen += inlen; -+} -+EXPORT_SYMBOL(blake2s_update); -+ -+void blake2s_final(struct blake2s_state *state, u8 *out) -+{ -+ WARN_ON(IS_ENABLED(DEBUG) && !out); -+ blake2s_set_lastblock(state); -+ memset(state->buf + state->buflen, 0, -+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)) -+ blake2s_compress_arch(state, state->buf, 1, state->buflen); -+ else -+ blake2s_compress_generic(state, state->buf, 1, state->buflen); -+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); -+ memcpy(out, state->h, state->outlen); -+ memzero_explicit(state, sizeof(*state)); -+} -+EXPORT_SYMBOL(blake2s_final); -+ -+void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, -+ const size_t keylen) -+{ -+ struct blake2s_state state; -+ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 }; -+ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32)); -+ int i; -+ -+ if (keylen > BLAKE2S_BLOCK_SIZE) { -+ blake2s_init(&state, BLAKE2S_HASH_SIZE); -+ blake2s_update(&state, key, keylen); -+ blake2s_final(&state, x_key); -+ } else -+ memcpy(x_key, key, keylen); -+ -+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) -+ x_key[i] ^= 0x36; -+ -+ blake2s_init(&state, BLAKE2S_HASH_SIZE); -+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); -+ blake2s_update(&state, in, inlen); -+ blake2s_final(&state, i_hash); -+ -+ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i) -+ x_key[i] ^= 0x5c ^ 0x36; -+ -+ blake2s_init(&state, BLAKE2S_HASH_SIZE); -+ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE); -+ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE); -+ blake2s_final(&state, i_hash); -+ -+ memcpy(out, i_hash, BLAKE2S_HASH_SIZE); -+ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE); -+ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE); -+} -+EXPORT_SYMBOL(blake2s256_hmac); -+ -+static int __init mod_init(void) -+{ -+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && -+ WARN_ON(!blake2s_selftest())) -+ return -ENODEV; -+ return 0; -+} -+ -+static void __exit mod_exit(void) -+{ -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("BLAKE2s hash function"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0022-crypto-testmgr-add-test-cases-for-Blake2s.patch b/target/linux/generic/backport-5.4/080-wireguard-0022-crypto-testmgr-add-test-cases-for-Blake2s.patch deleted file mode 100644 index 9adc75eb98..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0022-crypto-testmgr-add-test-cases-for-Blake2s.patch +++ /dev/null @@ -1,322 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:29 +0100 -Subject: [PATCH] crypto: testmgr - add test cases for Blake2s - -commit 17e1df67023a5c9ccaeb5de8bf5b88f63127ecf7 upstream. - -As suggested by Eric for the Blake2b implementation contributed by -David, introduce a set of test vectors for Blake2s covering different -digest and key sizes. - - blake2s-128 blake2s-160 blake2s-224 blake2s-256 - --------------------------------------------------- -len=0 | klen=0 klen=1 klen=16 klen=32 -len=1 | klen=16 klen=32 klen=0 klen=1 -len=7 | klen=32 klen=0 klen=1 klen=16 -len=15 | klen=1 klen=16 klen=32 klen=0 -len=64 | klen=0 klen=1 klen=16 klen=32 -len=247 | klen=16 klen=32 klen=0 klen=1 -len=256 | klen=32 klen=0 klen=1 klen=16 - -Cc: David Sterba <dsterba@suse.com> -Cc: Eric Biggers <ebiggers@google.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/testmgr.c | 24 +++++ - crypto/testmgr.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 275 insertions(+) - ---- a/crypto/testmgr.c -+++ b/crypto/testmgr.c -@@ -4035,6 +4035,30 @@ static const struct alg_test_desc alg_te - .test = alg_test_null, - .fips_allowed = 1, - }, { -+ .alg = "blake2s-128", -+ .test = alg_test_hash, -+ .suite = { -+ .hash = __VECS(blakes2s_128_tv_template) -+ } -+ }, { -+ .alg = "blake2s-160", -+ .test = alg_test_hash, -+ .suite = { -+ .hash = __VECS(blakes2s_160_tv_template) -+ } -+ }, { -+ .alg = "blake2s-224", -+ .test = alg_test_hash, -+ .suite = { -+ .hash = __VECS(blakes2s_224_tv_template) -+ } -+ }, { -+ .alg = "blake2s-256", -+ .test = alg_test_hash, -+ .suite = { -+ .hash = __VECS(blakes2s_256_tv_template) -+ } -+ }, { - .alg = "cbc(aes)", - .test = alg_test_skcipher, - .fips_allowed = 1, ---- a/crypto/testmgr.h -+++ b/crypto/testmgr.h -@@ -31567,4 +31567,255 @@ static const struct aead_testvec essiv_h - }, - }; - -+static const char blake2_ordered_sequence[] = -+ "\x00\x01\x02\x03\x04\x05\x06\x07" -+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" -+ "\x10\x11\x12\x13\x14\x15\x16\x17" -+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" -+ "\x20\x21\x22\x23\x24\x25\x26\x27" -+ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" -+ "\x30\x31\x32\x33\x34\x35\x36\x37" -+ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" -+ "\x40\x41\x42\x43\x44\x45\x46\x47" -+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" -+ "\x50\x51\x52\x53\x54\x55\x56\x57" -+ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" -+ "\x60\x61\x62\x63\x64\x65\x66\x67" -+ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" -+ "\x70\x71\x72\x73\x74\x75\x76\x77" -+ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" -+ "\x80\x81\x82\x83\x84\x85\x86\x87" -+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" -+ "\x90\x91\x92\x93\x94\x95\x96\x97" -+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" -+ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" -+ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" -+ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" -+ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" -+ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" -+ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" -+ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" -+ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" -+ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" -+ "\xe8\xe9\xea\xeb\xec\xed\xee\xef" -+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" -+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; -+ -+static const struct hash_testvec blakes2s_128_tv_template[] = {{ -+ .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01, -+ 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, }, -+}, { -+ .plaintext = blake2_ordered_sequence, -+ .psize = 64, -+ .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01, -+ 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 1, -+ .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82, -+ 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 7, -+ .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd, -+ 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 15, -+ .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2, -+ 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 247, -+ .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe, -+ 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 256, -+ .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6, -+ 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, }, -+}}; -+ -+static const struct hash_testvec blakes2s_160_tv_template[] = {{ -+ .plaintext = blake2_ordered_sequence, -+ .psize = 7, -+ .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e, -+ 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62, -+ 0xe3, 0xf2, 0x84, 0xff, }, -+}, { -+ .plaintext = blake2_ordered_sequence, -+ .psize = 256, -+ .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2, -+ 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1, -+ 0x9b, 0x2d, 0x35, 0x05, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3, -+ 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1, -+ 0x79, 0x65, 0x32, 0x93, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 1, -+ .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71, -+ 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef, -+ 0xa2, 0x3a, 0x56, 0x9c, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 15, -+ .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19, -+ 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34, -+ 0x83, 0x39, 0x0f, 0x30, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 64, -+ .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5, -+ 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d, -+ 0xac, 0xa6, 0x81, 0x63, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 247, -+ .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01, -+ 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10, -+ 0x0a, 0xf6, 0x73, 0xe8, }, -+}}; -+ -+static const struct hash_testvec blakes2s_224_tv_template[] = {{ -+ .plaintext = blake2_ordered_sequence, -+ .psize = 1, -+ .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91, -+ 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12, -+ 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc, -+ 0x48, 0x21, 0x97, 0xbb, }, -+}, { -+ .plaintext = blake2_ordered_sequence, -+ .psize = 247, -+ .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e, -+ 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4, -+ 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc, -+ 0x2b, 0xa4, 0xd5, 0xf6, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f, -+ 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3, -+ 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32, -+ 0xa7, 0x19, 0xfc, 0xb8, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 7, -+ .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76, -+ 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6, -+ 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8, -+ 0x7b, 0x45, 0xfe, 0x05, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 15, -+ .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36, -+ 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43, -+ 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e, -+ 0x25, 0xab, 0xc5, 0x02, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 64, -+ .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7, -+ 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c, -+ 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93, -+ 0x6a, 0x31, 0x83, 0xb5, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 256, -+ .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86, -+ 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2, -+ 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45, -+ 0xb3, 0xd7, 0xec, 0xcc, }, -+}}; -+ -+static const struct hash_testvec blakes2s_256_tv_template[] = {{ -+ .plaintext = blake2_ordered_sequence, -+ .psize = 15, -+ .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21, -+ 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67, -+ 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04, -+ 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b, -+ 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b, -+ 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a, -+ 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 1, -+ .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03, -+ 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18, -+ 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b, -+ 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 7, -+ .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03, -+ 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15, -+ 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34, -+ 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, }, -+}, { -+ .ksize = 32, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 64, -+ .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66, -+ 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26, -+ 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab, -+ 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, }, -+}, { -+ .ksize = 1, -+ .key = "B", -+ .plaintext = blake2_ordered_sequence, -+ .psize = 247, -+ .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84, -+ 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66, -+ 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0, -+ 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, }, -+}, { -+ .ksize = 16, -+ .key = blake2_ordered_sequence, -+ .plaintext = blake2_ordered_sequence, -+ .psize = 256, -+ .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b, -+ 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1, -+ 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed, -+ 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, }, -+}}; -+ - #endif /* _CRYPTO_TESTMGR_H */ diff --git a/target/linux/generic/backport-5.4/080-wireguard-0023-crypto-blake2s-implement-generic-shash-driver.patch b/target/linux/generic/backport-5.4/080-wireguard-0023-crypto-blake2s-implement-generic-shash-driver.patch deleted file mode 100644 index e25edf5dda..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0023-crypto-blake2s-implement-generic-shash-driver.patch +++ /dev/null @@ -1,245 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:30 +0100 -Subject: [PATCH] crypto: blake2s - implement generic shash driver - -commit 7f9b0880925f1f9d7d59504ea0892d2ae9cfc233 upstream. - -Wire up our newly added Blake2s implementation via the shash API. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/Kconfig | 18 ++++ - crypto/Makefile | 1 + - crypto/blake2s_generic.c | 171 ++++++++++++++++++++++++++++++ - include/crypto/internal/blake2s.h | 5 + - 4 files changed, 195 insertions(+) - create mode 100644 crypto/blake2s_generic.c - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -639,6 +639,24 @@ config CRYPTO_XXHASH - xxHash non-cryptographic hash algorithm. Extremely fast, working at - speeds close to RAM limits. - -+config CRYPTO_BLAKE2S -+ tristate "BLAKE2s digest algorithm" -+ select CRYPTO_LIB_BLAKE2S_GENERIC -+ select CRYPTO_HASH -+ help -+ Implementation of cryptographic hash function BLAKE2s -+ optimized for 8-32bit platforms and can produce digests of any size -+ between 1 to 32. The keyed hash is also implemented. -+ -+ This module provides the following algorithms: -+ -+ - blake2s-128 -+ - blake2s-160 -+ - blake2s-224 -+ - blake2s-256 -+ -+ See https://blake2.net for further information. -+ - config CRYPTO_CRCT10DIF - tristate "CRCT10DIF algorithm" - select CRYPTO_HASH ---- a/crypto/Makefile -+++ b/crypto/Makefile -@@ -74,6 +74,7 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebo - obj-$(CONFIG_CRYPTO_WP512) += wp512.o - CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 - obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o -+obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o - obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o - obj-$(CONFIG_CRYPTO_ECB) += ecb.o - obj-$(CONFIG_CRYPTO_CBC) += cbc.o ---- /dev/null -+++ b/crypto/blake2s_generic.c -@@ -0,0 +1,171 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/internal/blake2s.h> -+#include <crypto/internal/simd.h> -+#include <crypto/internal/hash.h> -+ -+#include <linux/types.h> -+#include <linux/jump_label.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, -+ unsigned int keylen) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); -+ -+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { -+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); -+ return -EINVAL; -+ } -+ -+ memcpy(tctx->key, key, keylen); -+ tctx->keylen = keylen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_init(struct shash_desc *desc) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const int outlen = crypto_shash_digestsize(desc->tfm); -+ -+ if (tctx->keylen) -+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen); -+ else -+ blake2s_init(state, outlen); -+ -+ return 0; -+} -+ -+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, -+ unsigned int inlen) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; -+ -+ if (unlikely(!inlen)) -+ return 0; -+ if (inlen > fill) { -+ memcpy(state->buf + state->buflen, in, fill); -+ blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); -+ state->buflen = 0; -+ in += fill; -+ inlen -= fill; -+ } -+ if (inlen > BLAKE2S_BLOCK_SIZE) { -+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); -+ /* Hash one less (full) block than strictly possible */ -+ blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); -+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ } -+ memcpy(state->buf + state->buflen, in, inlen); -+ state->buflen += inlen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ -+ blake2s_set_lastblock(state); -+ memset(state->buf + state->buflen, 0, -+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ -+ blake2s_compress_generic(state, state->buf, 1, state->buflen); -+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); -+ memcpy(out, state->h, state->outlen); -+ memzero_explicit(state, sizeof(*state)); -+ -+ return 0; -+} -+ -+static struct shash_alg blake2s_algs[] = {{ -+ .base.cra_name = "blake2s-128", -+ .base.cra_driver_name = "blake2s-128-generic", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_128_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-160", -+ .base.cra_driver_name = "blake2s-160-generic", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_160_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-224", -+ .base.cra_driver_name = "blake2s-224-generic", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_224_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-256", -+ .base.cra_driver_name = "blake2s-256-generic", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_256_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}}; -+ -+static int __init blake2s_mod_init(void) -+{ -+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+static void __exit blake2s_mod_exit(void) -+{ -+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+subsys_initcall(blake2s_mod_init); -+module_exit(blake2s_mod_exit); -+ -+MODULE_ALIAS_CRYPTO("blake2s-128"); -+MODULE_ALIAS_CRYPTO("blake2s-128-generic"); -+MODULE_ALIAS_CRYPTO("blake2s-160"); -+MODULE_ALIAS_CRYPTO("blake2s-160-generic"); -+MODULE_ALIAS_CRYPTO("blake2s-224"); -+MODULE_ALIAS_CRYPTO("blake2s-224-generic"); -+MODULE_ALIAS_CRYPTO("blake2s-256"); -+MODULE_ALIAS_CRYPTO("blake2s-256-generic"); -+MODULE_LICENSE("GPL v2"); ---- a/include/crypto/internal/blake2s.h -+++ b/include/crypto/internal/blake2s.h -@@ -5,6 +5,11 @@ - - #include <crypto/blake2s.h> - -+struct blake2s_tfm_ctx { -+ u8 key[BLAKE2S_KEY_SIZE]; -+ unsigned int keylen; -+}; -+ - void blake2s_compress_generic(struct blake2s_state *state,const u8 *block, - size_t nblocks, const u32 inc); - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch b/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch deleted file mode 100644 index 04405581d2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch +++ /dev/null @@ -1,557 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:31 +0100 -Subject: [PATCH] crypto: blake2s - x86_64 SIMD implementation - -commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream. - -These implementations from Samuel Neves support AVX and AVX-512VL. -Originally this used AVX-512F, but Skylake thermal throttling made -AVX-512VL more attractive and possible to do with negligable difference. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Samuel Neves <sneves@dei.uc.pt> -Co-developed-by: Samuel Neves <sneves@dei.uc.pt> -[ardb: move to arch/x86/crypto, wire into lib/crypto framework] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/Makefile | 2 + - arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++ - arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++ - crypto/Kconfig | 6 + - 4 files changed, 499 insertions(+) - create mode 100644 arch/x86/crypto/blake2s-core.S - create mode 100644 arch/x86/crypto/blake2s-glue.c - ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes) - obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o - obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o - obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o -+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o - endif - - # These modules require assembler to support AVX2. -@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x8 - aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o - - nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o -+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o - - ifeq ($(avx_supported),yes) - camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ ---- /dev/null -+++ b/arch/x86/crypto/blake2s-core.S -@@ -0,0 +1,258 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. -+ */ -+ -+#include <linux/linkage.h> -+ -+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 -+.align 32 -+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 -+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F -+.section .rodata.cst16.ROT16, "aM", @progbits, 16 -+.align 16 -+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 -+.section .rodata.cst16.ROR328, "aM", @progbits, 16 -+.align 16 -+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 -+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 -+.align 64 -+SIGMA: -+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 -+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 -+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 -+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 -+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 -+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 -+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 -+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 -+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 -+#ifdef CONFIG_AS_AVX512 -+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 -+.align 64 -+SIGMA2: -+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 -+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 -+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 -+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 -+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 -+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 -+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 -+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 -+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 -+#endif /* CONFIG_AS_AVX512 */ -+ -+.text -+#ifdef CONFIG_AS_SSSE3 -+ENTRY(blake2s_compress_ssse3) -+ testq %rdx,%rdx -+ je .Lendofloop -+ movdqu (%rdi),%xmm0 -+ movdqu 0x10(%rdi),%xmm1 -+ movdqa ROT16(%rip),%xmm12 -+ movdqa ROR328(%rip),%xmm13 -+ movdqu 0x20(%rdi),%xmm14 -+ movq %rcx,%xmm15 -+ leaq SIGMA+0xa0(%rip),%r8 -+ jmp .Lbeginofloop -+ .align 32 -+.Lbeginofloop: -+ movdqa %xmm0,%xmm10 -+ movdqa %xmm1,%xmm11 -+ paddq %xmm15,%xmm14 -+ movdqa IV(%rip),%xmm2 -+ movdqa %xmm14,%xmm3 -+ pxor IV+0x10(%rip),%xmm3 -+ leaq SIGMA(%rip),%rcx -+.Lroundloop: -+ movzbl (%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0x1(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0x2(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x3(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ punpckldq %xmm5,%xmm4 -+ punpckldq %xmm7,%xmm6 -+ punpcklqdq %xmm6,%xmm4 -+ paddd %xmm4,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm12,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0xc,%xmm1 -+ pslld $0x14,%xmm8 -+ por %xmm8,%xmm1 -+ movzbl 0x4(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0x5(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x6(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0x7(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ punpckldq %xmm6,%xmm5 -+ punpckldq %xmm4,%xmm7 -+ punpcklqdq %xmm7,%xmm5 -+ paddd %xmm5,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm13,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0x7,%xmm1 -+ pslld $0x19,%xmm8 -+ por %xmm8,%xmm1 -+ pshufd $0x93,%xmm0,%xmm0 -+ pshufd $0x4e,%xmm3,%xmm3 -+ pshufd $0x39,%xmm2,%xmm2 -+ movzbl 0x8(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x9(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0xa(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0xb(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ punpckldq %xmm7,%xmm6 -+ punpckldq %xmm5,%xmm4 -+ punpcklqdq %xmm4,%xmm6 -+ paddd %xmm6,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm12,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0xc,%xmm1 -+ pslld $0x14,%xmm8 -+ por %xmm8,%xmm1 -+ movzbl 0xc(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0xd(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0xe(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0xf(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ punpckldq %xmm4,%xmm7 -+ punpckldq %xmm6,%xmm5 -+ punpcklqdq %xmm5,%xmm7 -+ paddd %xmm7,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm13,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0x7,%xmm1 -+ pslld $0x19,%xmm8 -+ por %xmm8,%xmm1 -+ pshufd $0x39,%xmm0,%xmm0 -+ pshufd $0x4e,%xmm3,%xmm3 -+ pshufd $0x93,%xmm2,%xmm2 -+ addq $0x10,%rcx -+ cmpq %r8,%rcx -+ jnz .Lroundloop -+ pxor %xmm2,%xmm0 -+ pxor %xmm3,%xmm1 -+ pxor %xmm10,%xmm0 -+ pxor %xmm11,%xmm1 -+ addq $0x40,%rsi -+ decq %rdx -+ jnz .Lbeginofloop -+ movdqu %xmm0,(%rdi) -+ movdqu %xmm1,0x10(%rdi) -+ movdqu %xmm14,0x20(%rdi) -+.Lendofloop: -+ ret -+ENDPROC(blake2s_compress_ssse3) -+#endif /* CONFIG_AS_SSSE3 */ -+ -+#ifdef CONFIG_AS_AVX512 -+ENTRY(blake2s_compress_avx512) -+ vmovdqu (%rdi),%xmm0 -+ vmovdqu 0x10(%rdi),%xmm1 -+ vmovdqu 0x20(%rdi),%xmm4 -+ vmovq %rcx,%xmm5 -+ vmovdqa IV(%rip),%xmm14 -+ vmovdqa IV+16(%rip),%xmm15 -+ jmp .Lblake2s_compress_avx512_mainloop -+.align 32 -+.Lblake2s_compress_avx512_mainloop: -+ vmovdqa %xmm0,%xmm10 -+ vmovdqa %xmm1,%xmm11 -+ vpaddq %xmm5,%xmm4,%xmm4 -+ vmovdqa %xmm14,%xmm2 -+ vpxor %xmm15,%xmm4,%xmm3 -+ vmovdqu (%rsi),%ymm6 -+ vmovdqu 0x20(%rsi),%ymm7 -+ addq $0x40,%rsi -+ leaq SIGMA2(%rip),%rax -+ movb $0xa,%cl -+.Lblake2s_compress_avx512_roundloop: -+ addq $0x40,%rax -+ vmovdqa -0x40(%rax),%ymm8 -+ vmovdqa -0x20(%rax),%ymm9 -+ vpermi2d %ymm7,%ymm6,%ymm8 -+ vpermi2d %ymm7,%ymm6,%ymm9 -+ vmovdqa %ymm8,%ymm6 -+ vmovdqa %ymm9,%ymm7 -+ vpaddd %xmm8,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x10,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0xc,%xmm1,%xmm1 -+ vextracti128 $0x1,%ymm8,%xmm8 -+ vpaddd %xmm8,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x8,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0x7,%xmm1,%xmm1 -+ vpshufd $0x93,%xmm0,%xmm0 -+ vpshufd $0x4e,%xmm3,%xmm3 -+ vpshufd $0x39,%xmm2,%xmm2 -+ vpaddd %xmm9,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x10,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0xc,%xmm1,%xmm1 -+ vextracti128 $0x1,%ymm9,%xmm9 -+ vpaddd %xmm9,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x8,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0x7,%xmm1,%xmm1 -+ vpshufd $0x39,%xmm0,%xmm0 -+ vpshufd $0x4e,%xmm3,%xmm3 -+ vpshufd $0x93,%xmm2,%xmm2 -+ decb %cl -+ jne .Lblake2s_compress_avx512_roundloop -+ vpxor %xmm10,%xmm0,%xmm0 -+ vpxor %xmm11,%xmm1,%xmm1 -+ vpxor %xmm2,%xmm0,%xmm0 -+ vpxor %xmm3,%xmm1,%xmm1 -+ decq %rdx -+ jne .Lblake2s_compress_avx512_mainloop -+ vmovdqu %xmm0,(%rdi) -+ vmovdqu %xmm1,0x10(%rdi) -+ vmovdqu %xmm4,0x20(%rdi) -+ vzeroupper -+ retq -+ENDPROC(blake2s_compress_avx512) -+#endif /* CONFIG_AS_AVX512 */ ---- /dev/null -+++ b/arch/x86/crypto/blake2s-glue.c -@@ -0,0 +1,233 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/internal/blake2s.h> -+#include <crypto/internal/simd.h> -+#include <crypto/internal/hash.h> -+ -+#include <linux/types.h> -+#include <linux/jump_label.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+#include <asm/cpufeature.h> -+#include <asm/fpu/api.h> -+#include <asm/processor.h> -+#include <asm/simd.h> -+ -+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, -+ const u8 *block, const size_t nblocks, -+ const u32 inc); -+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, -+ const u8 *block, const size_t nblocks, -+ const u32 inc); -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); -+ -+void blake2s_compress_arch(struct blake2s_state *state, -+ const u8 *block, size_t nblocks, -+ const u32 inc) -+{ -+ /* SIMD disables preemption, so relax after processing each page. */ -+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); -+ -+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { -+ blake2s_compress_generic(state, block, nblocks, inc); -+ return; -+ } -+ -+ for (;;) { -+ const size_t blocks = min_t(size_t, nblocks, -+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE); -+ -+ kernel_fpu_begin(); -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ static_branch_likely(&blake2s_use_avx512)) -+ blake2s_compress_avx512(state, block, blocks, inc); -+ else -+ blake2s_compress_ssse3(state, block, blocks, inc); -+ kernel_fpu_end(); -+ -+ nblocks -= blocks; -+ if (!nblocks) -+ break; -+ block += blocks * BLAKE2S_BLOCK_SIZE; -+ } -+} -+EXPORT_SYMBOL(blake2s_compress_arch); -+ -+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, -+ unsigned int keylen) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); -+ -+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { -+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); -+ return -EINVAL; -+ } -+ -+ memcpy(tctx->key, key, keylen); -+ tctx->keylen = keylen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_init(struct shash_desc *desc) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const int outlen = crypto_shash_digestsize(desc->tfm); -+ -+ if (tctx->keylen) -+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen); -+ else -+ blake2s_init(state, outlen); -+ -+ return 0; -+} -+ -+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, -+ unsigned int inlen) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; -+ -+ if (unlikely(!inlen)) -+ return 0; -+ if (inlen > fill) { -+ memcpy(state->buf + state->buflen, in, fill); -+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); -+ state->buflen = 0; -+ in += fill; -+ inlen -= fill; -+ } -+ if (inlen > BLAKE2S_BLOCK_SIZE) { -+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); -+ /* Hash one less (full) block than strictly possible */ -+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); -+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ } -+ memcpy(state->buf + state->buflen, in, inlen); -+ state->buflen += inlen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ -+ blake2s_set_lastblock(state); -+ memset(state->buf + state->buflen, 0, -+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ -+ blake2s_compress_arch(state, state->buf, 1, state->buflen); -+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); -+ memcpy(out, state->h, state->outlen); -+ memzero_explicit(state, sizeof(*state)); -+ -+ return 0; -+} -+ -+static struct shash_alg blake2s_algs[] = {{ -+ .base.cra_name = "blake2s-128", -+ .base.cra_driver_name = "blake2s-128-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_128_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-160", -+ .base.cra_driver_name = "blake2s-160-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_160_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-224", -+ .base.cra_driver_name = "blake2s-224-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_224_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-256", -+ .base.cra_driver_name = "blake2s-256-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_256_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}}; -+ -+static int __init blake2s_mod_init(void) -+{ -+ if (!boot_cpu_has(X86_FEATURE_SSSE3)) -+ return 0; -+ -+ static_branch_enable(&blake2s_use_ssse3); -+ -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ boot_cpu_has(X86_FEATURE_AVX) && -+ boot_cpu_has(X86_FEATURE_AVX2) && -+ boot_cpu_has(X86_FEATURE_AVX512F) && -+ boot_cpu_has(X86_FEATURE_AVX512VL) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | -+ XFEATURE_MASK_AVX512, NULL)) -+ static_branch_enable(&blake2s_use_avx512); -+ -+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+static void __exit blake2s_mod_exit(void) -+{ -+ if (boot_cpu_has(X86_FEATURE_SSSE3)) -+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+module_init(blake2s_mod_init); -+module_exit(blake2s_mod_exit); -+ -+MODULE_ALIAS_CRYPTO("blake2s-128"); -+MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-160"); -+MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-224"); -+MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-256"); -+MODULE_ALIAS_CRYPTO("blake2s-256-x86"); -+MODULE_LICENSE("GPL v2"); ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S - - See https://blake2.net for further information. - -+config CRYPTO_BLAKE2S_X86 -+ tristate "BLAKE2s digest algorithm (x86 accelerated version)" -+ depends on X86 && 64BIT -+ select CRYPTO_LIB_BLAKE2S_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S -+ - config CRYPTO_CRCT10DIF - tristate "CRCT10DIF algorithm" - select CRYPTO_HASH diff --git a/target/linux/generic/backport-5.4/080-wireguard-0025-crypto-curve25519-generic-C-library-implementations.patch b/target/linux/generic/backport-5.4/080-wireguard-0025-crypto-curve25519-generic-C-library-implementations.patch deleted file mode 100644 index e58dda9213..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0025-crypto-curve25519-generic-C-library-implementations.patch +++ /dev/null @@ -1,1849 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:32 +0100 -Subject: [PATCH] crypto: curve25519 - generic C library implementations - -commit 0ed42a6f431e930b2e8fae21955406e09fe75d70 upstream. - -This contains two formally verified C implementations of the Curve25519 -scalar multiplication function, one for 32-bit systems, and one for -64-bit systems whose compiler supports efficient 128-bit integer types. -Not only are these implementations formally verified, but they are also -the fastest available C implementations. They have been modified to be -friendly to kernel space and to be generally less horrendous looking, -but still an effort has been made to retain their formally verified -characteristic, and so the C might look slightly unidiomatic. - -The 64-bit version comes from HACL*: https://github.com/project-everest/hacl-star -The 32-bit version comes from Fiat: https://github.com/mit-plv/fiat-crypto - -Information: https://cr.yp.to/ecdh.html - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -[ardb: - move from lib/zinc to lib/crypto - - replace .c #includes with Kconfig based object selection - - drop simd handling and simplify support for per-arch versions ] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/curve25519.h | 71 +++ - lib/crypto/Kconfig | 25 + - lib/crypto/Makefile | 5 + - lib/crypto/curve25519-fiat32.c | 864 +++++++++++++++++++++++++++++++++ - lib/crypto/curve25519-hacl64.c | 788 ++++++++++++++++++++++++++++++ - lib/crypto/curve25519.c | 25 + - 6 files changed, 1778 insertions(+) - create mode 100644 include/crypto/curve25519.h - create mode 100644 lib/crypto/curve25519-fiat32.c - create mode 100644 lib/crypto/curve25519-hacl64.c - create mode 100644 lib/crypto/curve25519.c - ---- /dev/null -+++ b/include/crypto/curve25519.h -@@ -0,0 +1,71 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef CURVE25519_H -+#define CURVE25519_H -+ -+#include <crypto/algapi.h> // For crypto_memneq. -+#include <linux/types.h> -+#include <linux/random.h> -+ -+enum curve25519_lengths { -+ CURVE25519_KEY_SIZE = 32 -+}; -+ -+extern const u8 curve25519_null_point[]; -+extern const u8 curve25519_base_point[]; -+ -+void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], -+ const u8 scalar[CURVE25519_KEY_SIZE], -+ const u8 point[CURVE25519_KEY_SIZE]); -+ -+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], -+ const u8 scalar[CURVE25519_KEY_SIZE], -+ const u8 point[CURVE25519_KEY_SIZE]); -+ -+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE]); -+ -+static inline -+bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE], -+ const u8 basepoint[CURVE25519_KEY_SIZE]) -+{ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) -+ curve25519_arch(mypublic, secret, basepoint); -+ else -+ curve25519_generic(mypublic, secret, basepoint); -+ return crypto_memneq(mypublic, curve25519_null_point, -+ CURVE25519_KEY_SIZE); -+} -+ -+static inline bool -+__must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE]) -+{ -+ if (unlikely(!crypto_memneq(secret, curve25519_null_point, -+ CURVE25519_KEY_SIZE))) -+ return false; -+ -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) -+ curve25519_base_arch(pub, secret); -+ else -+ curve25519_generic(pub, secret, curve25519_base_point); -+ return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); -+} -+ -+static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE]) -+{ -+ secret[0] &= 248; -+ secret[31] = (secret[31] & 127) | 64; -+} -+ -+static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]) -+{ -+ get_random_bytes_wait(secret, CURVE25519_KEY_SIZE); -+ curve25519_clamp_secret(secret); -+} -+ -+#endif /* CURVE25519_H */ ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -59,6 +59,31 @@ config CRYPTO_LIB_CHACHA - by either the generic implementation or an arch-specific one, if one - is available and enabled. - -+config CRYPTO_ARCH_HAVE_LIB_CURVE25519 -+ tristate -+ help -+ Declares whether the architecture provides an arch-specific -+ accelerated implementation of the Curve25519 library interface, -+ either builtin or as a module. -+ -+config CRYPTO_LIB_CURVE25519_GENERIC -+ tristate -+ help -+ This symbol can be depended upon by arch implementations of the -+ Curve25519 library interface that require the generic code as a -+ fallback, e.g., for SIMD implementations. If no arch specific -+ implementation is enabled, this implementation serves the users -+ of CRYPTO_LIB_CURVE25519. -+ -+config CRYPTO_LIB_CURVE25519 -+ tristate "Curve25519 scalar multiplication library" -+ depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519 -+ select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n -+ help -+ Enable the Curve25519 library interface. This interface may be -+ fulfilled by either the generic implementation or an arch-specific -+ one, if one is available and enabled. -+ - config CRYPTO_LIB_DES - tristate - ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -16,6 +16,11 @@ libblake2s-generic-y += blake2s-gener - obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o - libblake2s-y += blake2s.o - -+obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o -+libcurve25519-y := curve25519-fiat32.o -+libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -+libcurve25519-y += curve25519.o -+ - obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o - libdes-y := des.o - ---- /dev/null -+++ b/lib/crypto/curve25519-fiat32.c -@@ -0,0 +1,864 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2016 The fiat-crypto Authors. -+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is a machine-generated formally verified implementation of Curve25519 -+ * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally -+ * machine generated, it has been tweaked to be suitable for use in the kernel. -+ * It is optimized for 32-bit machines and machines that cannot work efficiently -+ * with 128-bit integer types. -+ */ -+ -+#include <asm/unaligned.h> -+#include <crypto/curve25519.h> -+#include <linux/string.h> -+ -+/* fe means field element. Here the field is \Z/(2^255-19). An element t, -+ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 -+ * t[3]+2^102 t[4]+...+2^230 t[9]. -+ * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. -+ * Multiplication and carrying produce fe from fe_loose. -+ */ -+typedef struct fe { u32 v[10]; } fe; -+ -+/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc -+ * Addition and subtraction produce fe_loose from (fe, fe). -+ */ -+typedef struct fe_loose { u32 v[10]; } fe_loose; -+ -+static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s) -+{ -+ /* Ignores top bit of s. */ -+ u32 a0 = get_unaligned_le32(s); -+ u32 a1 = get_unaligned_le32(s+4); -+ u32 a2 = get_unaligned_le32(s+8); -+ u32 a3 = get_unaligned_le32(s+12); -+ u32 a4 = get_unaligned_le32(s+16); -+ u32 a5 = get_unaligned_le32(s+20); -+ u32 a6 = get_unaligned_le32(s+24); -+ u32 a7 = get_unaligned_le32(s+28); -+ h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */ -+ h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */ -+ h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */ -+ h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */ -+ h[4] = (a3>> 6); /* (32- 6) = 26 */ -+ h[5] = a4&((1<<25)-1); /* 25 */ -+ h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */ -+ h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */ -+ h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */ -+ h[9] = (a7>> 6)&((1<<25)-1); /* 25 */ -+} -+ -+static __always_inline void fe_frombytes(fe *h, const u8 *s) -+{ -+ fe_frombytes_impl(h->v, s); -+} -+ -+static __always_inline u8 /*bool*/ -+addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) -+{ -+ /* This function extracts 25 bits of result and 1 bit of carry -+ * (26 total), so a 32-bit intermediate is sufficient. -+ */ -+ u32 x = a + b + c; -+ *low = x & ((1 << 25) - 1); -+ return (x >> 25) & 1; -+} -+ -+static __always_inline u8 /*bool*/ -+addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) -+{ -+ /* This function extracts 26 bits of result and 1 bit of carry -+ * (27 total), so a 32-bit intermediate is sufficient. -+ */ -+ u32 x = a + b + c; -+ *low = x & ((1 << 26) - 1); -+ return (x >> 26) & 1; -+} -+ -+static __always_inline u8 /*bool*/ -+subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low) -+{ -+ /* This function extracts 25 bits of result and 1 bit of borrow -+ * (26 total), so a 32-bit intermediate is sufficient. -+ */ -+ u32 x = a - b - c; -+ *low = x & ((1 << 25) - 1); -+ return x >> 31; -+} -+ -+static __always_inline u8 /*bool*/ -+subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low) -+{ -+ /* This function extracts 26 bits of result and 1 bit of borrow -+ *(27 total), so a 32-bit intermediate is sufficient. -+ */ -+ u32 x = a - b - c; -+ *low = x & ((1 << 26) - 1); -+ return x >> 31; -+} -+ -+static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz) -+{ -+ t = -!!t; /* all set if nonzero, 0 if 0 */ -+ return (t&nz) | ((~t)&z); -+} -+ -+static __always_inline void fe_freeze(u32 out[10], const u32 in1[10]) -+{ -+ { const u32 x17 = in1[9]; -+ { const u32 x18 = in1[8]; -+ { const u32 x16 = in1[7]; -+ { const u32 x14 = in1[6]; -+ { const u32 x12 = in1[5]; -+ { const u32 x10 = in1[4]; -+ { const u32 x8 = in1[3]; -+ { const u32 x6 = in1[2]; -+ { const u32 x4 = in1[1]; -+ { const u32 x2 = in1[0]; -+ { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20); -+ { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23); -+ { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26); -+ { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29); -+ { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32); -+ { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35); -+ { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38); -+ { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41); -+ { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44); -+ { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47); -+ { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff); -+ { u32 x50 = (x49 & 0x3ffffed); -+ { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52); -+ { u32 x54 = (x49 & 0x1ffffff); -+ { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56); -+ { u32 x58 = (x49 & 0x3ffffff); -+ { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60); -+ { u32 x62 = (x49 & 0x1ffffff); -+ { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64); -+ { u32 x66 = (x49 & 0x3ffffff); -+ { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68); -+ { u32 x70 = (x49 & 0x1ffffff); -+ { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72); -+ { u32 x74 = (x49 & 0x3ffffff); -+ { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76); -+ { u32 x78 = (x49 & 0x1ffffff); -+ { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80); -+ { u32 x82 = (x49 & 0x3ffffff); -+ { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84); -+ { u32 x86 = (x49 & 0x1ffffff); -+ { u32 x88; addcarryx_u25(x85, x47, x86, &x88); -+ out[0] = x52; -+ out[1] = x56; -+ out[2] = x60; -+ out[3] = x64; -+ out[4] = x68; -+ out[5] = x72; -+ out[6] = x76; -+ out[7] = x80; -+ out[8] = x84; -+ out[9] = x88; -+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} -+} -+ -+static __always_inline void fe_tobytes(u8 s[32], const fe *f) -+{ -+ u32 h[10]; -+ fe_freeze(h, f->v); -+ s[0] = h[0] >> 0; -+ s[1] = h[0] >> 8; -+ s[2] = h[0] >> 16; -+ s[3] = (h[0] >> 24) | (h[1] << 2); -+ s[4] = h[1] >> 6; -+ s[5] = h[1] >> 14; -+ s[6] = (h[1] >> 22) | (h[2] << 3); -+ s[7] = h[2] >> 5; -+ s[8] = h[2] >> 13; -+ s[9] = (h[2] >> 21) | (h[3] << 5); -+ s[10] = h[3] >> 3; -+ s[11] = h[3] >> 11; -+ s[12] = (h[3] >> 19) | (h[4] << 6); -+ s[13] = h[4] >> 2; -+ s[14] = h[4] >> 10; -+ s[15] = h[4] >> 18; -+ s[16] = h[5] >> 0; -+ s[17] = h[5] >> 8; -+ s[18] = h[5] >> 16; -+ s[19] = (h[5] >> 24) | (h[6] << 1); -+ s[20] = h[6] >> 7; -+ s[21] = h[6] >> 15; -+ s[22] = (h[6] >> 23) | (h[7] << 3); -+ s[23] = h[7] >> 5; -+ s[24] = h[7] >> 13; -+ s[25] = (h[7] >> 21) | (h[8] << 4); -+ s[26] = h[8] >> 4; -+ s[27] = h[8] >> 12; -+ s[28] = (h[8] >> 20) | (h[9] << 6); -+ s[29] = h[9] >> 2; -+ s[30] = h[9] >> 10; -+ s[31] = h[9] >> 18; -+} -+ -+/* h = f */ -+static __always_inline void fe_copy(fe *h, const fe *f) -+{ -+ memmove(h, f, sizeof(u32) * 10); -+} -+ -+static __always_inline void fe_copy_lt(fe_loose *h, const fe *f) -+{ -+ memmove(h, f, sizeof(u32) * 10); -+} -+ -+/* h = 0 */ -+static __always_inline void fe_0(fe *h) -+{ -+ memset(h, 0, sizeof(u32) * 10); -+} -+ -+/* h = 1 */ -+static __always_inline void fe_1(fe *h) -+{ -+ memset(h, 0, sizeof(u32) * 10); -+ h->v[0] = 1; -+} -+ -+static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+{ -+ { const u32 x20 = in1[9]; -+ { const u32 x21 = in1[8]; -+ { const u32 x19 = in1[7]; -+ { const u32 x17 = in1[6]; -+ { const u32 x15 = in1[5]; -+ { const u32 x13 = in1[4]; -+ { const u32 x11 = in1[3]; -+ { const u32 x9 = in1[2]; -+ { const u32 x7 = in1[1]; -+ { const u32 x5 = in1[0]; -+ { const u32 x38 = in2[9]; -+ { const u32 x39 = in2[8]; -+ { const u32 x37 = in2[7]; -+ { const u32 x35 = in2[6]; -+ { const u32 x33 = in2[5]; -+ { const u32 x31 = in2[4]; -+ { const u32 x29 = in2[3]; -+ { const u32 x27 = in2[2]; -+ { const u32 x25 = in2[1]; -+ { const u32 x23 = in2[0]; -+ out[0] = (x5 + x23); -+ out[1] = (x7 + x25); -+ out[2] = (x9 + x27); -+ out[3] = (x11 + x29); -+ out[4] = (x13 + x31); -+ out[5] = (x15 + x33); -+ out[6] = (x17 + x35); -+ out[7] = (x19 + x37); -+ out[8] = (x21 + x39); -+ out[9] = (x20 + x38); -+ }}}}}}}}}}}}}}}}}}}} -+} -+ -+/* h = f + g -+ * Can overlap h with f or g. -+ */ -+static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g) -+{ -+ fe_add_impl(h->v, f->v, g->v); -+} -+ -+static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+{ -+ { const u32 x20 = in1[9]; -+ { const u32 x21 = in1[8]; -+ { const u32 x19 = in1[7]; -+ { const u32 x17 = in1[6]; -+ { const u32 x15 = in1[5]; -+ { const u32 x13 = in1[4]; -+ { const u32 x11 = in1[3]; -+ { const u32 x9 = in1[2]; -+ { const u32 x7 = in1[1]; -+ { const u32 x5 = in1[0]; -+ { const u32 x38 = in2[9]; -+ { const u32 x39 = in2[8]; -+ { const u32 x37 = in2[7]; -+ { const u32 x35 = in2[6]; -+ { const u32 x33 = in2[5]; -+ { const u32 x31 = in2[4]; -+ { const u32 x29 = in2[3]; -+ { const u32 x27 = in2[2]; -+ { const u32 x25 = in2[1]; -+ { const u32 x23 = in2[0]; -+ out[0] = ((0x7ffffda + x5) - x23); -+ out[1] = ((0x3fffffe + x7) - x25); -+ out[2] = ((0x7fffffe + x9) - x27); -+ out[3] = ((0x3fffffe + x11) - x29); -+ out[4] = ((0x7fffffe + x13) - x31); -+ out[5] = ((0x3fffffe + x15) - x33); -+ out[6] = ((0x7fffffe + x17) - x35); -+ out[7] = ((0x3fffffe + x19) - x37); -+ out[8] = ((0x7fffffe + x21) - x39); -+ out[9] = ((0x3fffffe + x20) - x38); -+ }}}}}}}}}}}}}}}}}}}} -+} -+ -+/* h = f - g -+ * Can overlap h with f or g. -+ */ -+static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g) -+{ -+ fe_sub_impl(h->v, f->v, g->v); -+} -+ -+static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+{ -+ { const u32 x20 = in1[9]; -+ { const u32 x21 = in1[8]; -+ { const u32 x19 = in1[7]; -+ { const u32 x17 = in1[6]; -+ { const u32 x15 = in1[5]; -+ { const u32 x13 = in1[4]; -+ { const u32 x11 = in1[3]; -+ { const u32 x9 = in1[2]; -+ { const u32 x7 = in1[1]; -+ { const u32 x5 = in1[0]; -+ { const u32 x38 = in2[9]; -+ { const u32 x39 = in2[8]; -+ { const u32 x37 = in2[7]; -+ { const u32 x35 = in2[6]; -+ { const u32 x33 = in2[5]; -+ { const u32 x31 = in2[4]; -+ { const u32 x29 = in2[3]; -+ { const u32 x27 = in2[2]; -+ { const u32 x25 = in2[1]; -+ { const u32 x23 = in2[0]; -+ { u64 x40 = ((u64)x23 * x5); -+ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); -+ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); -+ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); -+ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); -+ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); -+ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); -+ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); -+ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); -+ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); -+ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); -+ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); -+ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); -+ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); -+ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); -+ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); -+ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); -+ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); -+ { u64 x58 = ((u64)(0x2 * x38) * x20); -+ { u64 x59 = (x48 + (x58 << 0x4)); -+ { u64 x60 = (x59 + (x58 << 0x1)); -+ { u64 x61 = (x60 + x58); -+ { u64 x62 = (x47 + (x57 << 0x4)); -+ { u64 x63 = (x62 + (x57 << 0x1)); -+ { u64 x64 = (x63 + x57); -+ { u64 x65 = (x46 + (x56 << 0x4)); -+ { u64 x66 = (x65 + (x56 << 0x1)); -+ { u64 x67 = (x66 + x56); -+ { u64 x68 = (x45 + (x55 << 0x4)); -+ { u64 x69 = (x68 + (x55 << 0x1)); -+ { u64 x70 = (x69 + x55); -+ { u64 x71 = (x44 + (x54 << 0x4)); -+ { u64 x72 = (x71 + (x54 << 0x1)); -+ { u64 x73 = (x72 + x54); -+ { u64 x74 = (x43 + (x53 << 0x4)); -+ { u64 x75 = (x74 + (x53 << 0x1)); -+ { u64 x76 = (x75 + x53); -+ { u64 x77 = (x42 + (x52 << 0x4)); -+ { u64 x78 = (x77 + (x52 << 0x1)); -+ { u64 x79 = (x78 + x52); -+ { u64 x80 = (x41 + (x51 << 0x4)); -+ { u64 x81 = (x80 + (x51 << 0x1)); -+ { u64 x82 = (x81 + x51); -+ { u64 x83 = (x40 + (x50 << 0x4)); -+ { u64 x84 = (x83 + (x50 << 0x1)); -+ { u64 x85 = (x84 + x50); -+ { u64 x86 = (x85 >> 0x1a); -+ { u32 x87 = ((u32)x85 & 0x3ffffff); -+ { u64 x88 = (x86 + x82); -+ { u64 x89 = (x88 >> 0x19); -+ { u32 x90 = ((u32)x88 & 0x1ffffff); -+ { u64 x91 = (x89 + x79); -+ { u64 x92 = (x91 >> 0x1a); -+ { u32 x93 = ((u32)x91 & 0x3ffffff); -+ { u64 x94 = (x92 + x76); -+ { u64 x95 = (x94 >> 0x19); -+ { u32 x96 = ((u32)x94 & 0x1ffffff); -+ { u64 x97 = (x95 + x73); -+ { u64 x98 = (x97 >> 0x1a); -+ { u32 x99 = ((u32)x97 & 0x3ffffff); -+ { u64 x100 = (x98 + x70); -+ { u64 x101 = (x100 >> 0x19); -+ { u32 x102 = ((u32)x100 & 0x1ffffff); -+ { u64 x103 = (x101 + x67); -+ { u64 x104 = (x103 >> 0x1a); -+ { u32 x105 = ((u32)x103 & 0x3ffffff); -+ { u64 x106 = (x104 + x64); -+ { u64 x107 = (x106 >> 0x19); -+ { u32 x108 = ((u32)x106 & 0x1ffffff); -+ { u64 x109 = (x107 + x61); -+ { u64 x110 = (x109 >> 0x1a); -+ { u32 x111 = ((u32)x109 & 0x3ffffff); -+ { u64 x112 = (x110 + x49); -+ { u64 x113 = (x112 >> 0x19); -+ { u32 x114 = ((u32)x112 & 0x1ffffff); -+ { u64 x115 = (x87 + (0x13 * x113)); -+ { u32 x116 = (u32) (x115 >> 0x1a); -+ { u32 x117 = ((u32)x115 & 0x3ffffff); -+ { u32 x118 = (x116 + x90); -+ { u32 x119 = (x118 >> 0x19); -+ { u32 x120 = (x118 & 0x1ffffff); -+ out[0] = x117; -+ out[1] = x120; -+ out[2] = (x119 + x93); -+ out[3] = x96; -+ out[4] = x99; -+ out[5] = x102; -+ out[6] = x105; -+ out[7] = x108; -+ out[8] = x111; -+ out[9] = x114; -+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} -+} -+ -+static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g) -+{ -+ fe_mul_impl(h->v, f->v, g->v); -+} -+ -+static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) -+{ -+ fe_mul_impl(h->v, f->v, g->v); -+} -+ -+static __always_inline void -+fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) -+{ -+ fe_mul_impl(h->v, f->v, g->v); -+} -+ -+static void fe_sqr_impl(u32 out[10], const u32 in1[10]) -+{ -+ { const u32 x17 = in1[9]; -+ { const u32 x18 = in1[8]; -+ { const u32 x16 = in1[7]; -+ { const u32 x14 = in1[6]; -+ { const u32 x12 = in1[5]; -+ { const u32 x10 = in1[4]; -+ { const u32 x8 = in1[3]; -+ { const u32 x6 = in1[2]; -+ { const u32 x4 = in1[1]; -+ { const u32 x2 = in1[0]; -+ { u64 x19 = ((u64)x2 * x2); -+ { u64 x20 = ((u64)(0x2 * x2) * x4); -+ { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6))); -+ { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8))); -+ { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10)); -+ { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12))); -+ { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12))); -+ { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16))); -+ { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12)))))); -+ { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17))); -+ { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17))))); -+ { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17))); -+ { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17)))))); -+ { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17))); -+ { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17))); -+ { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17))); -+ { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17)); -+ { u64 x36 = ((u64)(0x2 * x18) * x17); -+ { u64 x37 = ((u64)(0x2 * x17) * x17); -+ { u64 x38 = (x27 + (x37 << 0x4)); -+ { u64 x39 = (x38 + (x37 << 0x1)); -+ { u64 x40 = (x39 + x37); -+ { u64 x41 = (x26 + (x36 << 0x4)); -+ { u64 x42 = (x41 + (x36 << 0x1)); -+ { u64 x43 = (x42 + x36); -+ { u64 x44 = (x25 + (x35 << 0x4)); -+ { u64 x45 = (x44 + (x35 << 0x1)); -+ { u64 x46 = (x45 + x35); -+ { u64 x47 = (x24 + (x34 << 0x4)); -+ { u64 x48 = (x47 + (x34 << 0x1)); -+ { u64 x49 = (x48 + x34); -+ { u64 x50 = (x23 + (x33 << 0x4)); -+ { u64 x51 = (x50 + (x33 << 0x1)); -+ { u64 x52 = (x51 + x33); -+ { u64 x53 = (x22 + (x32 << 0x4)); -+ { u64 x54 = (x53 + (x32 << 0x1)); -+ { u64 x55 = (x54 + x32); -+ { u64 x56 = (x21 + (x31 << 0x4)); -+ { u64 x57 = (x56 + (x31 << 0x1)); -+ { u64 x58 = (x57 + x31); -+ { u64 x59 = (x20 + (x30 << 0x4)); -+ { u64 x60 = (x59 + (x30 << 0x1)); -+ { u64 x61 = (x60 + x30); -+ { u64 x62 = (x19 + (x29 << 0x4)); -+ { u64 x63 = (x62 + (x29 << 0x1)); -+ { u64 x64 = (x63 + x29); -+ { u64 x65 = (x64 >> 0x1a); -+ { u32 x66 = ((u32)x64 & 0x3ffffff); -+ { u64 x67 = (x65 + x61); -+ { u64 x68 = (x67 >> 0x19); -+ { u32 x69 = ((u32)x67 & 0x1ffffff); -+ { u64 x70 = (x68 + x58); -+ { u64 x71 = (x70 >> 0x1a); -+ { u32 x72 = ((u32)x70 & 0x3ffffff); -+ { u64 x73 = (x71 + x55); -+ { u64 x74 = (x73 >> 0x19); -+ { u32 x75 = ((u32)x73 & 0x1ffffff); -+ { u64 x76 = (x74 + x52); -+ { u64 x77 = (x76 >> 0x1a); -+ { u32 x78 = ((u32)x76 & 0x3ffffff); -+ { u64 x79 = (x77 + x49); -+ { u64 x80 = (x79 >> 0x19); -+ { u32 x81 = ((u32)x79 & 0x1ffffff); -+ { u64 x82 = (x80 + x46); -+ { u64 x83 = (x82 >> 0x1a); -+ { u32 x84 = ((u32)x82 & 0x3ffffff); -+ { u64 x85 = (x83 + x43); -+ { u64 x86 = (x85 >> 0x19); -+ { u32 x87 = ((u32)x85 & 0x1ffffff); -+ { u64 x88 = (x86 + x40); -+ { u64 x89 = (x88 >> 0x1a); -+ { u32 x90 = ((u32)x88 & 0x3ffffff); -+ { u64 x91 = (x89 + x28); -+ { u64 x92 = (x91 >> 0x19); -+ { u32 x93 = ((u32)x91 & 0x1ffffff); -+ { u64 x94 = (x66 + (0x13 * x92)); -+ { u32 x95 = (u32) (x94 >> 0x1a); -+ { u32 x96 = ((u32)x94 & 0x3ffffff); -+ { u32 x97 = (x95 + x69); -+ { u32 x98 = (x97 >> 0x19); -+ { u32 x99 = (x97 & 0x1ffffff); -+ out[0] = x96; -+ out[1] = x99; -+ out[2] = (x98 + x72); -+ out[3] = x75; -+ out[4] = x78; -+ out[5] = x81; -+ out[6] = x84; -+ out[7] = x87; -+ out[8] = x90; -+ out[9] = x93; -+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} -+} -+ -+static __always_inline void fe_sq_tl(fe *h, const fe_loose *f) -+{ -+ fe_sqr_impl(h->v, f->v); -+} -+ -+static __always_inline void fe_sq_tt(fe *h, const fe *f) -+{ -+ fe_sqr_impl(h->v, f->v); -+} -+ -+static __always_inline void fe_loose_invert(fe *out, const fe_loose *z) -+{ -+ fe t0; -+ fe t1; -+ fe t2; -+ fe t3; -+ int i; -+ -+ fe_sq_tl(&t0, z); -+ fe_sq_tt(&t1, &t0); -+ for (i = 1; i < 2; ++i) -+ fe_sq_tt(&t1, &t1); -+ fe_mul_tlt(&t1, z, &t1); -+ fe_mul_ttt(&t0, &t0, &t1); -+ fe_sq_tt(&t2, &t0); -+ fe_mul_ttt(&t1, &t1, &t2); -+ fe_sq_tt(&t2, &t1); -+ for (i = 1; i < 5; ++i) -+ fe_sq_tt(&t2, &t2); -+ fe_mul_ttt(&t1, &t2, &t1); -+ fe_sq_tt(&t2, &t1); -+ for (i = 1; i < 10; ++i) -+ fe_sq_tt(&t2, &t2); -+ fe_mul_ttt(&t2, &t2, &t1); -+ fe_sq_tt(&t3, &t2); -+ for (i = 1; i < 20; ++i) -+ fe_sq_tt(&t3, &t3); -+ fe_mul_ttt(&t2, &t3, &t2); -+ fe_sq_tt(&t2, &t2); -+ for (i = 1; i < 10; ++i) -+ fe_sq_tt(&t2, &t2); -+ fe_mul_ttt(&t1, &t2, &t1); -+ fe_sq_tt(&t2, &t1); -+ for (i = 1; i < 50; ++i) -+ fe_sq_tt(&t2, &t2); -+ fe_mul_ttt(&t2, &t2, &t1); -+ fe_sq_tt(&t3, &t2); -+ for (i = 1; i < 100; ++i) -+ fe_sq_tt(&t3, &t3); -+ fe_mul_ttt(&t2, &t3, &t2); -+ fe_sq_tt(&t2, &t2); -+ for (i = 1; i < 50; ++i) -+ fe_sq_tt(&t2, &t2); -+ fe_mul_ttt(&t1, &t2, &t1); -+ fe_sq_tt(&t1, &t1); -+ for (i = 1; i < 5; ++i) -+ fe_sq_tt(&t1, &t1); -+ fe_mul_ttt(out, &t1, &t0); -+} -+ -+static __always_inline void fe_invert(fe *out, const fe *z) -+{ -+ fe_loose l; -+ fe_copy_lt(&l, z); -+ fe_loose_invert(out, &l); -+} -+ -+/* Replace (f,g) with (g,f) if b == 1; -+ * replace (f,g) with (f,g) if b == 0. -+ * -+ * Preconditions: b in {0,1} -+ */ -+static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b) -+{ -+ unsigned i; -+ b = 0 - b; -+ for (i = 0; i < 10; i++) { -+ u32 x = f->v[i] ^ g->v[i]; -+ x &= b; -+ f->v[i] ^= x; -+ g->v[i] ^= x; -+ } -+} -+ -+/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/ -+static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10]) -+{ -+ { const u32 x20 = in1[9]; -+ { const u32 x21 = in1[8]; -+ { const u32 x19 = in1[7]; -+ { const u32 x17 = in1[6]; -+ { const u32 x15 = in1[5]; -+ { const u32 x13 = in1[4]; -+ { const u32 x11 = in1[3]; -+ { const u32 x9 = in1[2]; -+ { const u32 x7 = in1[1]; -+ { const u32 x5 = in1[0]; -+ { const u32 x38 = 0; -+ { const u32 x39 = 0; -+ { const u32 x37 = 0; -+ { const u32 x35 = 0; -+ { const u32 x33 = 0; -+ { const u32 x31 = 0; -+ { const u32 x29 = 0; -+ { const u32 x27 = 0; -+ { const u32 x25 = 0; -+ { const u32 x23 = 121666; -+ { u64 x40 = ((u64)x23 * x5); -+ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5)); -+ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5)); -+ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5)); -+ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5)); -+ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5)); -+ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5)); -+ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5)); -+ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5)); -+ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5)); -+ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9)); -+ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9)); -+ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13)); -+ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13)); -+ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17)); -+ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17)); -+ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19)))); -+ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21)); -+ { u64 x58 = ((u64)(0x2 * x38) * x20); -+ { u64 x59 = (x48 + (x58 << 0x4)); -+ { u64 x60 = (x59 + (x58 << 0x1)); -+ { u64 x61 = (x60 + x58); -+ { u64 x62 = (x47 + (x57 << 0x4)); -+ { u64 x63 = (x62 + (x57 << 0x1)); -+ { u64 x64 = (x63 + x57); -+ { u64 x65 = (x46 + (x56 << 0x4)); -+ { u64 x66 = (x65 + (x56 << 0x1)); -+ { u64 x67 = (x66 + x56); -+ { u64 x68 = (x45 + (x55 << 0x4)); -+ { u64 x69 = (x68 + (x55 << 0x1)); -+ { u64 x70 = (x69 + x55); -+ { u64 x71 = (x44 + (x54 << 0x4)); -+ { u64 x72 = (x71 + (x54 << 0x1)); -+ { u64 x73 = (x72 + x54); -+ { u64 x74 = (x43 + (x53 << 0x4)); -+ { u64 x75 = (x74 + (x53 << 0x1)); -+ { u64 x76 = (x75 + x53); -+ { u64 x77 = (x42 + (x52 << 0x4)); -+ { u64 x78 = (x77 + (x52 << 0x1)); -+ { u64 x79 = (x78 + x52); -+ { u64 x80 = (x41 + (x51 << 0x4)); -+ { u64 x81 = (x80 + (x51 << 0x1)); -+ { u64 x82 = (x81 + x51); -+ { u64 x83 = (x40 + (x50 << 0x4)); -+ { u64 x84 = (x83 + (x50 << 0x1)); -+ { u64 x85 = (x84 + x50); -+ { u64 x86 = (x85 >> 0x1a); -+ { u32 x87 = ((u32)x85 & 0x3ffffff); -+ { u64 x88 = (x86 + x82); -+ { u64 x89 = (x88 >> 0x19); -+ { u32 x90 = ((u32)x88 & 0x1ffffff); -+ { u64 x91 = (x89 + x79); -+ { u64 x92 = (x91 >> 0x1a); -+ { u32 x93 = ((u32)x91 & 0x3ffffff); -+ { u64 x94 = (x92 + x76); -+ { u64 x95 = (x94 >> 0x19); -+ { u32 x96 = ((u32)x94 & 0x1ffffff); -+ { u64 x97 = (x95 + x73); -+ { u64 x98 = (x97 >> 0x1a); -+ { u32 x99 = ((u32)x97 & 0x3ffffff); -+ { u64 x100 = (x98 + x70); -+ { u64 x101 = (x100 >> 0x19); -+ { u32 x102 = ((u32)x100 & 0x1ffffff); -+ { u64 x103 = (x101 + x67); -+ { u64 x104 = (x103 >> 0x1a); -+ { u32 x105 = ((u32)x103 & 0x3ffffff); -+ { u64 x106 = (x104 + x64); -+ { u64 x107 = (x106 >> 0x19); -+ { u32 x108 = ((u32)x106 & 0x1ffffff); -+ { u64 x109 = (x107 + x61); -+ { u64 x110 = (x109 >> 0x1a); -+ { u32 x111 = ((u32)x109 & 0x3ffffff); -+ { u64 x112 = (x110 + x49); -+ { u64 x113 = (x112 >> 0x19); -+ { u32 x114 = ((u32)x112 & 0x1ffffff); -+ { u64 x115 = (x87 + (0x13 * x113)); -+ { u32 x116 = (u32) (x115 >> 0x1a); -+ { u32 x117 = ((u32)x115 & 0x3ffffff); -+ { u32 x118 = (x116 + x90); -+ { u32 x119 = (x118 >> 0x19); -+ { u32 x120 = (x118 & 0x1ffffff); -+ out[0] = x117; -+ out[1] = x120; -+ out[2] = (x119 + x93); -+ out[3] = x96; -+ out[4] = x99; -+ out[5] = x102; -+ out[6] = x105; -+ out[7] = x108; -+ out[8] = x111; -+ out[9] = x114; -+ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} -+} -+ -+static __always_inline void fe_mul121666(fe *h, const fe_loose *f) -+{ -+ fe_mul_121666_impl(h->v, f->v); -+} -+ -+void curve25519_generic(u8 out[CURVE25519_KEY_SIZE], -+ const u8 scalar[CURVE25519_KEY_SIZE], -+ const u8 point[CURVE25519_KEY_SIZE]) -+{ -+ fe x1, x2, z2, x3, z3; -+ fe_loose x2l, z2l, x3l; -+ unsigned swap = 0; -+ int pos; -+ u8 e[32]; -+ -+ memcpy(e, scalar, 32); -+ curve25519_clamp_secret(e); -+ -+ /* The following implementation was transcribed to Coq and proven to -+ * correspond to unary scalar multiplication in affine coordinates given -+ * that x1 != 0 is the x coordinate of some point on the curve. It was -+ * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives -+ * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was -+ * quantified over the underlying field, so it applies to Curve25519 -+ * itself and the quadratic twist of Curve25519. It was not proven in -+ * Coq that prime-field arithmetic correctly simulates extension-field -+ * arithmetic on prime-field values. The decoding of the byte array -+ * representation of e was not considered. -+ * -+ * Specification of Montgomery curves in affine coordinates: -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27> -+ * -+ * Proof that these form a group that is isomorphic to a Weierstrass -+ * curve: -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35> -+ * -+ * Coq transcription and correctness proof of the loop -+ * (where scalarbits=255): -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118> -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278> -+ * preconditions: 0 <= e < 2^255 (not necessarily e < order), -+ * fe_invert(0) = 0 -+ */ -+ fe_frombytes(&x1, point); -+ fe_1(&x2); -+ fe_0(&z2); -+ fe_copy(&x3, &x1); -+ fe_1(&z3); -+ -+ for (pos = 254; pos >= 0; --pos) { -+ fe tmp0, tmp1; -+ fe_loose tmp0l, tmp1l; -+ /* loop invariant as of right before the test, for the case -+ * where x1 != 0: -+ * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 -+ * is nonzero -+ * let r := e >> (pos+1) in the following equalities of -+ * projective points: -+ * to_xz (r*P) === if swap then (x3, z3) else (x2, z2) -+ * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3) -+ * x1 is the nonzero x coordinate of the nonzero -+ * point (r*P-(r+1)*P) -+ */ -+ unsigned b = 1 & (e[pos / 8] >> (pos & 7)); -+ swap ^= b; -+ fe_cswap(&x2, &x3, swap); -+ fe_cswap(&z2, &z3, swap); -+ swap = b; -+ /* Coq transcription of ladderstep formula (called from -+ * transcribed loop): -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89> -+ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131> -+ * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217> -+ * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147> -+ */ -+ fe_sub(&tmp0l, &x3, &z3); -+ fe_sub(&tmp1l, &x2, &z2); -+ fe_add(&x2l, &x2, &z2); -+ fe_add(&z2l, &x3, &z3); -+ fe_mul_tll(&z3, &tmp0l, &x2l); -+ fe_mul_tll(&z2, &z2l, &tmp1l); -+ fe_sq_tl(&tmp0, &tmp1l); -+ fe_sq_tl(&tmp1, &x2l); -+ fe_add(&x3l, &z3, &z2); -+ fe_sub(&z2l, &z3, &z2); -+ fe_mul_ttt(&x2, &tmp1, &tmp0); -+ fe_sub(&tmp1l, &tmp1, &tmp0); -+ fe_sq_tl(&z2, &z2l); -+ fe_mul121666(&z3, &tmp1l); -+ fe_sq_tl(&x3, &x3l); -+ fe_add(&tmp0l, &tmp0, &z3); -+ fe_mul_ttt(&z3, &x1, &z2); -+ fe_mul_tll(&z2, &tmp1l, &tmp0l); -+ } -+ /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) -+ * else (x2, z2) -+ */ -+ fe_cswap(&x2, &x3, swap); -+ fe_cswap(&z2, &z3, swap); -+ -+ fe_invert(&z2, &z2); -+ fe_mul_ttt(&x2, &x2, &z2); -+ fe_tobytes(out, &x2); -+ -+ memzero_explicit(&x1, sizeof(x1)); -+ memzero_explicit(&x2, sizeof(x2)); -+ memzero_explicit(&z2, sizeof(z2)); -+ memzero_explicit(&x3, sizeof(x3)); -+ memzero_explicit(&z3, sizeof(z3)); -+ memzero_explicit(&x2l, sizeof(x2l)); -+ memzero_explicit(&z2l, sizeof(z2l)); -+ memzero_explicit(&x3l, sizeof(x3l)); -+ memzero_explicit(&e, sizeof(e)); -+} ---- /dev/null -+++ b/lib/crypto/curve25519-hacl64.c -@@ -0,0 +1,788 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. -+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is a machine-generated formally verified implementation of Curve25519 -+ * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine -+ * generated, it has been tweaked to be suitable for use in the kernel. It is -+ * optimized for 64-bit machines that can efficiently work with 128-bit -+ * integer types. -+ */ -+ -+#include <asm/unaligned.h> -+#include <crypto/curve25519.h> -+#include <linux/string.h> -+ -+typedef __uint128_t u128; -+ -+static __always_inline u64 u64_eq_mask(u64 a, u64 b) -+{ -+ u64 x = a ^ b; -+ u64 minus_x = ~x + (u64)1U; -+ u64 x_or_minus_x = x | minus_x; -+ u64 xnx = x_or_minus_x >> (u32)63U; -+ u64 c = xnx - (u64)1U; -+ return c; -+} -+ -+static __always_inline u64 u64_gte_mask(u64 a, u64 b) -+{ -+ u64 x = a; -+ u64 y = b; -+ u64 x_xor_y = x ^ y; -+ u64 x_sub_y = x - y; -+ u64 x_sub_y_xor_y = x_sub_y ^ y; -+ u64 q = x_xor_y | x_sub_y_xor_y; -+ u64 x_xor_q = x ^ q; -+ u64 x_xor_q_ = x_xor_q >> (u32)63U; -+ u64 c = x_xor_q_ - (u64)1U; -+ return c; -+} -+ -+static __always_inline void modulo_carry_top(u64 *b) -+{ -+ u64 b4 = b[4]; -+ u64 b0 = b[0]; -+ u64 b4_ = b4 & 0x7ffffffffffffLLU; -+ u64 b0_ = b0 + 19 * (b4 >> 51); -+ b[4] = b4_; -+ b[0] = b0_; -+} -+ -+static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) -+{ -+ { -+ u128 xi = input[0]; -+ output[0] = ((u64)(xi)); -+ } -+ { -+ u128 xi = input[1]; -+ output[1] = ((u64)(xi)); -+ } -+ { -+ u128 xi = input[2]; -+ output[2] = ((u64)(xi)); -+ } -+ { -+ u128 xi = input[3]; -+ output[3] = ((u64)(xi)); -+ } -+ { -+ u128 xi = input[4]; -+ output[4] = ((u64)(xi)); -+ } -+} -+ -+static __always_inline void -+fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) -+{ -+ output[0] += (u128)input[0] * s; -+ output[1] += (u128)input[1] * s; -+ output[2] += (u128)input[2] * s; -+ output[3] += (u128)input[3] * s; -+ output[4] += (u128)input[4] * s; -+} -+ -+static __always_inline void fproduct_carry_wide_(u128 *tmp) -+{ -+ { -+ u32 ctr = 0; -+ u128 tctr = tmp[ctr]; -+ u128 tctrp1 = tmp[ctr + 1]; -+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; -+ u128 c = ((tctr) >> (51)); -+ tmp[ctr] = ((u128)(r0)); -+ tmp[ctr + 1] = ((tctrp1) + (c)); -+ } -+ { -+ u32 ctr = 1; -+ u128 tctr = tmp[ctr]; -+ u128 tctrp1 = tmp[ctr + 1]; -+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; -+ u128 c = ((tctr) >> (51)); -+ tmp[ctr] = ((u128)(r0)); -+ tmp[ctr + 1] = ((tctrp1) + (c)); -+ } -+ -+ { -+ u32 ctr = 2; -+ u128 tctr = tmp[ctr]; -+ u128 tctrp1 = tmp[ctr + 1]; -+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; -+ u128 c = ((tctr) >> (51)); -+ tmp[ctr] = ((u128)(r0)); -+ tmp[ctr + 1] = ((tctrp1) + (c)); -+ } -+ { -+ u32 ctr = 3; -+ u128 tctr = tmp[ctr]; -+ u128 tctrp1 = tmp[ctr + 1]; -+ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; -+ u128 c = ((tctr) >> (51)); -+ tmp[ctr] = ((u128)(r0)); -+ tmp[ctr + 1] = ((tctrp1) + (c)); -+ } -+} -+ -+static __always_inline void fmul_shift_reduce(u64 *output) -+{ -+ u64 tmp = output[4]; -+ u64 b0; -+ { -+ u32 ctr = 5 - 0 - 1; -+ u64 z = output[ctr - 1]; -+ output[ctr] = z; -+ } -+ { -+ u32 ctr = 5 - 1 - 1; -+ u64 z = output[ctr - 1]; -+ output[ctr] = z; -+ } -+ { -+ u32 ctr = 5 - 2 - 1; -+ u64 z = output[ctr - 1]; -+ output[ctr] = z; -+ } -+ { -+ u32 ctr = 5 - 3 - 1; -+ u64 z = output[ctr - 1]; -+ output[ctr] = z; -+ } -+ output[0] = tmp; -+ b0 = output[0]; -+ output[0] = 19 * b0; -+} -+ -+static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, -+ u64 *input21) -+{ -+ u32 i; -+ u64 input2i; -+ { -+ u64 input2i = input21[0]; -+ fproduct_sum_scalar_multiplication_(output, input, input2i); -+ fmul_shift_reduce(input); -+ } -+ { -+ u64 input2i = input21[1]; -+ fproduct_sum_scalar_multiplication_(output, input, input2i); -+ fmul_shift_reduce(input); -+ } -+ { -+ u64 input2i = input21[2]; -+ fproduct_sum_scalar_multiplication_(output, input, input2i); -+ fmul_shift_reduce(input); -+ } -+ { -+ u64 input2i = input21[3]; -+ fproduct_sum_scalar_multiplication_(output, input, input2i); -+ fmul_shift_reduce(input); -+ } -+ i = 4; -+ input2i = input21[i]; -+ fproduct_sum_scalar_multiplication_(output, input, input2i); -+} -+ -+static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) -+{ -+ u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; -+ { -+ u128 b4; -+ u128 b0; -+ u128 b4_; -+ u128 b0_; -+ u64 i0; -+ u64 i1; -+ u64 i0_; -+ u64 i1_; -+ u128 t[5] = { 0 }; -+ fmul_mul_shift_reduce_(t, tmp, input21); -+ fproduct_carry_wide_(t); -+ b4 = t[4]; -+ b0 = t[0]; -+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); -+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); -+ t[4] = b4_; -+ t[0] = b0_; -+ fproduct_copy_from_wide_(output, t); -+ i0 = output[0]; -+ i1 = output[1]; -+ i0_ = i0 & 0x7ffffffffffffLLU; -+ i1_ = i1 + (i0 >> 51); -+ output[0] = i0_; -+ output[1] = i1_; -+ } -+} -+ -+static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) -+{ -+ u64 r0 = output[0]; -+ u64 r1 = output[1]; -+ u64 r2 = output[2]; -+ u64 r3 = output[3]; -+ u64 r4 = output[4]; -+ u64 d0 = r0 * 2; -+ u64 d1 = r1 * 2; -+ u64 d2 = r2 * 2 * 19; -+ u64 d419 = r4 * 19; -+ u64 d4 = d419 * 2; -+ u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + -+ (((u128)(d2) * (r3)))); -+ u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + -+ (((u128)(r3 * 19) * (r3)))); -+ u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + -+ (((u128)(d4) * (r3)))); -+ u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + -+ (((u128)(r4) * (d419)))); -+ u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + -+ (((u128)(r2) * (r2)))); -+ tmp[0] = s0; -+ tmp[1] = s1; -+ tmp[2] = s2; -+ tmp[3] = s3; -+ tmp[4] = s4; -+} -+ -+static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) -+{ -+ u128 b4; -+ u128 b0; -+ u128 b4_; -+ u128 b0_; -+ u64 i0; -+ u64 i1; -+ u64 i0_; -+ u64 i1_; -+ fsquare_fsquare__(tmp, output); -+ fproduct_carry_wide_(tmp); -+ b4 = tmp[4]; -+ b0 = tmp[0]; -+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); -+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); -+ tmp[4] = b4_; -+ tmp[0] = b0_; -+ fproduct_copy_from_wide_(output, tmp); -+ i0 = output[0]; -+ i1 = output[1]; -+ i0_ = i0 & 0x7ffffffffffffLLU; -+ i1_ = i1 + (i0 >> 51); -+ output[0] = i0_; -+ output[1] = i1_; -+} -+ -+static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, -+ u32 count1) -+{ -+ u32 i; -+ fsquare_fsquare_(tmp, output); -+ for (i = 1; i < count1; ++i) -+ fsquare_fsquare_(tmp, output); -+} -+ -+static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, -+ u32 count1) -+{ -+ u128 t[5]; -+ memcpy(output, input, 5 * sizeof(*input)); -+ fsquare_fsquare_times_(output, t, count1); -+} -+ -+static __always_inline void fsquare_fsquare_times_inplace(u64 *output, -+ u32 count1) -+{ -+ u128 t[5]; -+ fsquare_fsquare_times_(output, t, count1); -+} -+ -+static __always_inline void crecip_crecip(u64 *out, u64 *z) -+{ -+ u64 buf[20] = { 0 }; -+ u64 *a0 = buf; -+ u64 *t00 = buf + 5; -+ u64 *b0 = buf + 10; -+ u64 *t01; -+ u64 *b1; -+ u64 *c0; -+ u64 *a; -+ u64 *t0; -+ u64 *b; -+ u64 *c; -+ fsquare_fsquare_times(a0, z, 1); -+ fsquare_fsquare_times(t00, a0, 2); -+ fmul_fmul(b0, t00, z); -+ fmul_fmul(a0, b0, a0); -+ fsquare_fsquare_times(t00, a0, 1); -+ fmul_fmul(b0, t00, b0); -+ fsquare_fsquare_times(t00, b0, 5); -+ t01 = buf + 5; -+ b1 = buf + 10; -+ c0 = buf + 15; -+ fmul_fmul(b1, t01, b1); -+ fsquare_fsquare_times(t01, b1, 10); -+ fmul_fmul(c0, t01, b1); -+ fsquare_fsquare_times(t01, c0, 20); -+ fmul_fmul(t01, t01, c0); -+ fsquare_fsquare_times_inplace(t01, 10); -+ fmul_fmul(b1, t01, b1); -+ fsquare_fsquare_times(t01, b1, 50); -+ a = buf; -+ t0 = buf + 5; -+ b = buf + 10; -+ c = buf + 15; -+ fmul_fmul(c, t0, b); -+ fsquare_fsquare_times(t0, c, 100); -+ fmul_fmul(t0, t0, c); -+ fsquare_fsquare_times_inplace(t0, 50); -+ fmul_fmul(t0, t0, b); -+ fsquare_fsquare_times_inplace(t0, 5); -+ fmul_fmul(out, t0, a); -+} -+ -+static __always_inline void fsum(u64 *a, u64 *b) -+{ -+ a[0] += b[0]; -+ a[1] += b[1]; -+ a[2] += b[2]; -+ a[3] += b[3]; -+ a[4] += b[4]; -+} -+ -+static __always_inline void fdifference(u64 *a, u64 *b) -+{ -+ u64 tmp[5] = { 0 }; -+ u64 b0; -+ u64 b1; -+ u64 b2; -+ u64 b3; -+ u64 b4; -+ memcpy(tmp, b, 5 * sizeof(*b)); -+ b0 = tmp[0]; -+ b1 = tmp[1]; -+ b2 = tmp[2]; -+ b3 = tmp[3]; -+ b4 = tmp[4]; -+ tmp[0] = b0 + 0x3fffffffffff68LLU; -+ tmp[1] = b1 + 0x3ffffffffffff8LLU; -+ tmp[2] = b2 + 0x3ffffffffffff8LLU; -+ tmp[3] = b3 + 0x3ffffffffffff8LLU; -+ tmp[4] = b4 + 0x3ffffffffffff8LLU; -+ { -+ u64 xi = a[0]; -+ u64 yi = tmp[0]; -+ a[0] = yi - xi; -+ } -+ { -+ u64 xi = a[1]; -+ u64 yi = tmp[1]; -+ a[1] = yi - xi; -+ } -+ { -+ u64 xi = a[2]; -+ u64 yi = tmp[2]; -+ a[2] = yi - xi; -+ } -+ { -+ u64 xi = a[3]; -+ u64 yi = tmp[3]; -+ a[3] = yi - xi; -+ } -+ { -+ u64 xi = a[4]; -+ u64 yi = tmp[4]; -+ a[4] = yi - xi; -+ } -+} -+ -+static __always_inline void fscalar(u64 *output, u64 *b, u64 s) -+{ -+ u128 tmp[5]; -+ u128 b4; -+ u128 b0; -+ u128 b4_; -+ u128 b0_; -+ { -+ u64 xi = b[0]; -+ tmp[0] = ((u128)(xi) * (s)); -+ } -+ { -+ u64 xi = b[1]; -+ tmp[1] = ((u128)(xi) * (s)); -+ } -+ { -+ u64 xi = b[2]; -+ tmp[2] = ((u128)(xi) * (s)); -+ } -+ { -+ u64 xi = b[3]; -+ tmp[3] = ((u128)(xi) * (s)); -+ } -+ { -+ u64 xi = b[4]; -+ tmp[4] = ((u128)(xi) * (s)); -+ } -+ fproduct_carry_wide_(tmp); -+ b4 = tmp[4]; -+ b0 = tmp[0]; -+ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); -+ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); -+ tmp[4] = b4_; -+ tmp[0] = b0_; -+ fproduct_copy_from_wide_(output, tmp); -+} -+ -+static __always_inline void fmul(u64 *output, u64 *a, u64 *b) -+{ -+ fmul_fmul(output, a, b); -+} -+ -+static __always_inline void crecip(u64 *output, u64 *input) -+{ -+ crecip_crecip(output, input); -+} -+ -+static __always_inline void point_swap_conditional_step(u64 *a, u64 *b, -+ u64 swap1, u32 ctr) -+{ -+ u32 i = ctr - 1; -+ u64 ai = a[i]; -+ u64 bi = b[i]; -+ u64 x = swap1 & (ai ^ bi); -+ u64 ai1 = ai ^ x; -+ u64 bi1 = bi ^ x; -+ a[i] = ai1; -+ b[i] = bi1; -+} -+ -+static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) -+{ -+ point_swap_conditional_step(a, b, swap1, 5); -+ point_swap_conditional_step(a, b, swap1, 4); -+ point_swap_conditional_step(a, b, swap1, 3); -+ point_swap_conditional_step(a, b, swap1, 2); -+ point_swap_conditional_step(a, b, swap1, 1); -+} -+ -+static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) -+{ -+ u64 swap1 = 0 - iswap; -+ point_swap_conditional5(a, b, swap1); -+ point_swap_conditional5(a + 5, b + 5, swap1); -+} -+ -+static __always_inline void point_copy(u64 *output, u64 *input) -+{ -+ memcpy(output, input, 5 * sizeof(*input)); -+ memcpy(output + 5, input + 5, 5 * sizeof(*input)); -+} -+ -+static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, -+ u64 *pq, u64 *qmqp) -+{ -+ u64 *qx = qmqp; -+ u64 *x2 = pp; -+ u64 *z2 = pp + 5; -+ u64 *x3 = ppq; -+ u64 *z3 = ppq + 5; -+ u64 *x = p; -+ u64 *z = p + 5; -+ u64 *xprime = pq; -+ u64 *zprime = pq + 5; -+ u64 buf[40] = { 0 }; -+ u64 *origx = buf; -+ u64 *origxprime0 = buf + 5; -+ u64 *xxprime0; -+ u64 *zzprime0; -+ u64 *origxprime; -+ xxprime0 = buf + 25; -+ zzprime0 = buf + 30; -+ memcpy(origx, x, 5 * sizeof(*x)); -+ fsum(x, z); -+ fdifference(z, origx); -+ memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); -+ fsum(xprime, zprime); -+ fdifference(zprime, origxprime0); -+ fmul(xxprime0, xprime, z); -+ fmul(zzprime0, x, zprime); -+ origxprime = buf + 5; -+ { -+ u64 *xx0; -+ u64 *zz0; -+ u64 *xxprime; -+ u64 *zzprime; -+ u64 *zzzprime; -+ xx0 = buf + 15; -+ zz0 = buf + 20; -+ xxprime = buf + 25; -+ zzprime = buf + 30; -+ zzzprime = buf + 35; -+ memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); -+ fsum(xxprime, zzprime); -+ fdifference(zzprime, origxprime); -+ fsquare_fsquare_times(x3, xxprime, 1); -+ fsquare_fsquare_times(zzzprime, zzprime, 1); -+ fmul(z3, zzzprime, qx); -+ fsquare_fsquare_times(xx0, x, 1); -+ fsquare_fsquare_times(zz0, z, 1); -+ { -+ u64 *zzz; -+ u64 *xx; -+ u64 *zz; -+ u64 scalar; -+ zzz = buf + 10; -+ xx = buf + 15; -+ zz = buf + 20; -+ fmul(x2, xx, zz); -+ fdifference(zz, xx); -+ scalar = 121665; -+ fscalar(zzz, zz, scalar); -+ fsum(zzz, xx); -+ fmul(z2, zzz, zz); -+ } -+ } -+} -+ -+static __always_inline void -+ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, -+ u64 *q, u8 byt) -+{ -+ u64 bit0 = (u64)(byt >> 7); -+ u64 bit; -+ point_swap_conditional(nq, nqpq, bit0); -+ addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); -+ bit = (u64)(byt >> 7); -+ point_swap_conditional(nq2, nqpq2, bit); -+} -+ -+static __always_inline void -+ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, -+ u64 *nqpq2, u64 *q, u8 byt) -+{ -+ u8 byt1; -+ ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); -+ byt1 = byt << 1; -+ ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); -+} -+ -+static __always_inline void -+ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, -+ u64 *q, u8 byt, u32 i) -+{ -+ while (i--) { -+ ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, -+ nqpq2, q, byt); -+ byt <<= 2; -+ } -+} -+ -+static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, -+ u64 *nqpq, u64 *nq2, -+ u64 *nqpq2, u64 *q, -+ u32 i) -+{ -+ while (i--) { -+ u8 byte = n1[i]; -+ ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, -+ byte, 4); -+ } -+} -+ -+static void ladder_cmult(u64 *result, u8 *n1, u64 *q) -+{ -+ u64 point_buf[40] = { 0 }; -+ u64 *nq = point_buf; -+ u64 *nqpq = point_buf + 10; -+ u64 *nq2 = point_buf + 20; -+ u64 *nqpq2 = point_buf + 30; -+ point_copy(nqpq, q); -+ nq[0] = 1; -+ ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); -+ point_copy(result, nq); -+} -+ -+static __always_inline void format_fexpand(u64 *output, const u8 *input) -+{ -+ const u8 *x00 = input + 6; -+ const u8 *x01 = input + 12; -+ const u8 *x02 = input + 19; -+ const u8 *x0 = input + 24; -+ u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; -+ i0 = get_unaligned_le64(input); -+ i1 = get_unaligned_le64(x00); -+ i2 = get_unaligned_le64(x01); -+ i3 = get_unaligned_le64(x02); -+ i4 = get_unaligned_le64(x0); -+ output0 = i0 & 0x7ffffffffffffLLU; -+ output1 = i1 >> 3 & 0x7ffffffffffffLLU; -+ output2 = i2 >> 6 & 0x7ffffffffffffLLU; -+ output3 = i3 >> 1 & 0x7ffffffffffffLLU; -+ output4 = i4 >> 12 & 0x7ffffffffffffLLU; -+ output[0] = output0; -+ output[1] = output1; -+ output[2] = output2; -+ output[3] = output3; -+ output[4] = output4; -+} -+ -+static __always_inline void format_fcontract_first_carry_pass(u64 *input) -+{ -+ u64 t0 = input[0]; -+ u64 t1 = input[1]; -+ u64 t2 = input[2]; -+ u64 t3 = input[3]; -+ u64 t4 = input[4]; -+ u64 t1_ = t1 + (t0 >> 51); -+ u64 t0_ = t0 & 0x7ffffffffffffLLU; -+ u64 t2_ = t2 + (t1_ >> 51); -+ u64 t1__ = t1_ & 0x7ffffffffffffLLU; -+ u64 t3_ = t3 + (t2_ >> 51); -+ u64 t2__ = t2_ & 0x7ffffffffffffLLU; -+ u64 t4_ = t4 + (t3_ >> 51); -+ u64 t3__ = t3_ & 0x7ffffffffffffLLU; -+ input[0] = t0_; -+ input[1] = t1__; -+ input[2] = t2__; -+ input[3] = t3__; -+ input[4] = t4_; -+} -+ -+static __always_inline void format_fcontract_first_carry_full(u64 *input) -+{ -+ format_fcontract_first_carry_pass(input); -+ modulo_carry_top(input); -+} -+ -+static __always_inline void format_fcontract_second_carry_pass(u64 *input) -+{ -+ u64 t0 = input[0]; -+ u64 t1 = input[1]; -+ u64 t2 = input[2]; -+ u64 t3 = input[3]; -+ u64 t4 = input[4]; -+ u64 t1_ = t1 + (t0 >> 51); -+ u64 t0_ = t0 & 0x7ffffffffffffLLU; -+ u64 t2_ = t2 + (t1_ >> 51); -+ u64 t1__ = t1_ & 0x7ffffffffffffLLU; -+ u64 t3_ = t3 + (t2_ >> 51); -+ u64 t2__ = t2_ & 0x7ffffffffffffLLU; -+ u64 t4_ = t4 + (t3_ >> 51); -+ u64 t3__ = t3_ & 0x7ffffffffffffLLU; -+ input[0] = t0_; -+ input[1] = t1__; -+ input[2] = t2__; -+ input[3] = t3__; -+ input[4] = t4_; -+} -+ -+static __always_inline void format_fcontract_second_carry_full(u64 *input) -+{ -+ u64 i0; -+ u64 i1; -+ u64 i0_; -+ u64 i1_; -+ format_fcontract_second_carry_pass(input); -+ modulo_carry_top(input); -+ i0 = input[0]; -+ i1 = input[1]; -+ i0_ = i0 & 0x7ffffffffffffLLU; -+ i1_ = i1 + (i0 >> 51); -+ input[0] = i0_; -+ input[1] = i1_; -+} -+ -+static __always_inline void format_fcontract_trim(u64 *input) -+{ -+ u64 a0 = input[0]; -+ u64 a1 = input[1]; -+ u64 a2 = input[2]; -+ u64 a3 = input[3]; -+ u64 a4 = input[4]; -+ u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); -+ u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); -+ u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); -+ u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); -+ u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); -+ u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; -+ u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); -+ u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); -+ u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); -+ u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); -+ u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); -+ input[0] = a0_; -+ input[1] = a1_; -+ input[2] = a2_; -+ input[3] = a3_; -+ input[4] = a4_; -+} -+ -+static __always_inline void format_fcontract_store(u8 *output, u64 *input) -+{ -+ u64 t0 = input[0]; -+ u64 t1 = input[1]; -+ u64 t2 = input[2]; -+ u64 t3 = input[3]; -+ u64 t4 = input[4]; -+ u64 o0 = t1 << 51 | t0; -+ u64 o1 = t2 << 38 | t1 >> 13; -+ u64 o2 = t3 << 25 | t2 >> 26; -+ u64 o3 = t4 << 12 | t3 >> 39; -+ u8 *b0 = output; -+ u8 *b1 = output + 8; -+ u8 *b2 = output + 16; -+ u8 *b3 = output + 24; -+ put_unaligned_le64(o0, b0); -+ put_unaligned_le64(o1, b1); -+ put_unaligned_le64(o2, b2); -+ put_unaligned_le64(o3, b3); -+} -+ -+static __always_inline void format_fcontract(u8 *output, u64 *input) -+{ -+ format_fcontract_first_carry_full(input); -+ format_fcontract_second_carry_full(input); -+ format_fcontract_trim(input); -+ format_fcontract_store(output, input); -+} -+ -+static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) -+{ -+ u64 *x = point; -+ u64 *z = point + 5; -+ u64 buf[10] __aligned(32) = { 0 }; -+ u64 *zmone = buf; -+ u64 *sc = buf + 5; -+ crecip(zmone, z); -+ fmul(sc, x, zmone); -+ format_fcontract(scalar, sc); -+} -+ -+void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE], -+ const u8 basepoint[CURVE25519_KEY_SIZE]) -+{ -+ u64 buf0[10] __aligned(32) = { 0 }; -+ u64 *x0 = buf0; -+ u64 *z = buf0 + 5; -+ u64 *q; -+ format_fexpand(x0, basepoint); -+ z[0] = 1; -+ q = buf0; -+ { -+ u8 e[32] __aligned(32) = { 0 }; -+ u8 *scalar; -+ memcpy(e, secret, 32); -+ curve25519_clamp_secret(e); -+ scalar = e; -+ { -+ u64 buf[15] = { 0 }; -+ u64 *nq = buf; -+ u64 *x = nq; -+ x[0] = 1; -+ ladder_cmult(nq, scalar, q); -+ format_scalar_of_point(mypublic, nq); -+ memzero_explicit(buf, sizeof(buf)); -+ } -+ memzero_explicit(e, sizeof(e)); -+ } -+ memzero_explicit(buf0, sizeof(buf0)); -+} ---- /dev/null -+++ b/lib/crypto/curve25519.c -@@ -0,0 +1,25 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is an implementation of the Curve25519 ECDH algorithm, using either -+ * a 32-bit implementation or a 64-bit implementation with 128-bit integers, -+ * depending on what is supported by the target compiler. -+ * -+ * Information: https://cr.yp.to/ecdh.html -+ */ -+ -+#include <crypto/curve25519.h> -+#include <linux/module.h> -+#include <linux/init.h> -+ -+const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; -+const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; -+ -+EXPORT_SYMBOL(curve25519_null_point); -+EXPORT_SYMBOL(curve25519_base_point); -+EXPORT_SYMBOL(curve25519_generic); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("Curve25519 scalar multiplication"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0026-crypto-curve25519-add-kpp-selftest.patch b/target/linux/generic/backport-5.4/080-wireguard-0026-crypto-curve25519-add-kpp-selftest.patch deleted file mode 100644 index b2813aeb6a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0026-crypto-curve25519-add-kpp-selftest.patch +++ /dev/null @@ -1,1268 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:33 +0100 -Subject: [PATCH] crypto: curve25519 - add kpp selftest - -commit f613457a7af085728297bef71233c37faf3c01b1 upstream. - -In preparation of introducing KPP implementations of Curve25519, import -the set of test cases proposed by the Zinc patch set, but converted to -the KPP format. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/testmgr.c | 6 + - crypto/testmgr.h | 1225 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 1231 insertions(+) - ---- a/crypto/testmgr.c -+++ b/crypto/testmgr.c -@@ -4296,6 +4296,12 @@ static const struct alg_test_desc alg_te - .test = alg_test_null, - .fips_allowed = 1, - }, { -+ .alg = "curve25519", -+ .test = alg_test_kpp, -+ .suite = { -+ .kpp = __VECS(curve25519_tv_template) -+ } -+ }, { - .alg = "deflate", - .test = alg_test_comp, - .fips_allowed = 1, ---- a/crypto/testmgr.h -+++ b/crypto/testmgr.h -@@ -1030,6 +1030,1231 @@ static const struct kpp_testvec dh_tv_te - } - }; - -+static const struct kpp_testvec curve25519_tv_template[] = { -+{ -+ .secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, -+ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, -+ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, -+ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, -+ .b_public = (u8[32]){ 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, -+ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, -+ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, -+ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, -+ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, -+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, -+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, -+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, -+ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, -+ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, -+ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, -+ .b_public = (u8[32]){ 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, -+ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, -+ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, -+ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, -+ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, -+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, -+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, -+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 1 }, -+ .b_public = (u8[32]){ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, -+ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, -+ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, -+ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 1 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, -+ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, -+ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, -+ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, -+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, -+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, -+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, -+ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, -+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, -+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, -+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, -+ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, -+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, -+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, -+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, -+ .expected_ss = (u8[32]){ 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, -+ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, -+ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, -+ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+{ -+ .secret = (u8[32]){ 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, -+ .expected_ss = (u8[32]){ 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, -+ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, -+ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, -+ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - normal case */ -+{ -+ .secret = (u8[32]){ 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, -+ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, -+ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, -+ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, -+ .b_public = (u8[32]){ 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, -+ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, -+ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, -+ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, -+ .expected_ss = (u8[32]){ 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, -+ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, -+ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, -+ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key on twist */ -+{ -+ .secret = (u8[32]){ 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, -+ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, -+ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, -+ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, -+ .b_public = (u8[32]){ 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, -+ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, -+ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, -+ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, -+ .expected_ss = (u8[32]){ 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, -+ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, -+ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, -+ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key on twist */ -+{ -+ .secret = (u8[32]){ 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, -+ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, -+ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, -+ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, -+ .b_public = (u8[32]){ 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, -+ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, -+ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, -+ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, -+ .expected_ss = (u8[32]){ 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, -+ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, -+ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, -+ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key on twist */ -+{ -+ .secret = (u8[32]){ 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, -+ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, -+ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, -+ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, -+ .b_public = (u8[32]){ 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, -+ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, -+ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, -+ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, -+ .expected_ss = (u8[32]){ 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, -+ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, -+ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, -+ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key on twist */ -+{ -+ .secret = (u8[32]){ 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, -+ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, -+ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, -+ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, -+ .b_public = (u8[32]){ 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, -+ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, -+ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, -+ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, -+ .expected_ss = (u8[32]){ 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, -+ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, -+ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, -+ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key on twist */ -+{ -+ .secret = (u8[32]){ 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, -+ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, -+ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, -+ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, -+ .b_public = (u8[32]){ 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, -+ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, -+ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, -+ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, -+ .expected_ss = (u8[32]){ 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, -+ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, -+ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, -+ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, -+ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, -+ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, -+ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, -+ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, -+ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, -+ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, -+ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, -+ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, -+ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, -+ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, -+ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, -+ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, -+ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, -+ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, -+ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, -+ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, -+ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, -+ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, -+ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, -+ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, -+ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, -+ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, -+ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, -+ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, -+ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, -+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, -+ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, -+ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, -+ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, -+ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, -+ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, -+ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, -+ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, -+ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, -+ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, -+ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, -+ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, -+ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case on twist */ -+{ -+ .secret = (u8[32]){ 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, -+ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, -+ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, -+ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, -+ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, -+ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, -+ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, -+ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, -+ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, -+ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, -+ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, -+ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, -+ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, -+ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, -+ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, -+ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, -+ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, -+ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, -+ .expected_ss = (u8[32]){ 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, -+ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, -+ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, -+ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, -+ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, -+ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, -+ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, -+ .expected_ss = (u8[32]){ 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, -+ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, -+ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, -+ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, -+ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, -+ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, -+ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, -+ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, -+ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, -+ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, -+ .expected_ss = (u8[32]){ 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, -+ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, -+ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, -+ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, -+ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, -+ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, -+ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .expected_ss = (u8[32]){ 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, -+ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, -+ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, -+ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, -+ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, -+ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, -+ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, -+ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, -+ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, -+ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for public key */ -+{ -+ .secret = (u8[32]){ 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, -+ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, -+ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, -+ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, -+ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, -+ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, -+ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, -+ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, -+ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, -+ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, -+ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, -+ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, -+ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, -+ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, -+ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, -+ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, -+ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, -+ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, -+ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, -+ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, -+ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, -+ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, -+ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, -+ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, -+ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, -+ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, -+ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, -+ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, -+ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, -+ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, -+ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, -+ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .expected_ss = (u8[32]){ 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, -+ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, -+ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, -+ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, -+ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, -+ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, -+ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, -+ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .expected_ss = (u8[32]){ 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, -+ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, -+ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, -+ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, -+ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, -+ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, -+ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, -+ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .expected_ss = (u8[32]){ 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, -+ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, -+ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, -+ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, -+ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, -+ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, -+ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, -+ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .expected_ss = (u8[32]){ 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, -+ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, -+ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, -+ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, -+ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, -+ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, -+ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, -+ .b_public = (u8[32]){ 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, -+ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, -+ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, -+ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, -+ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, -+ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, -+ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, -+ .b_public = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, -+ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, -+ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, -+ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, -+ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, -+ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, -+ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, -+ .b_public = (u8[32]){ 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, -+ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, -+ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, -+ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, -+ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, -+ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, -+ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, -+ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, -+ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, -+ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, -+ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, -+ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, -+ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, -+ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, -+ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, -+ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, -+ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, -+ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, -+ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, -+ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, -+ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, -+ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, -+ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, -+ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, -+ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, -+ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, -+ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, -+ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, -+ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, -+ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, -+ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, -+ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, -+ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, -+ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, -+ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, -+ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, -+ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, -+ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, -+ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - public key >= p */ -+{ -+ .secret = (u8[32]){ 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, -+ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, -+ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, -+ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, -+ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .expected_ss = (u8[32]){ 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, -+ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, -+ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, -+ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - RFC 7748 */ -+{ -+ .secret = (u8[32]){ 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, -+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, -+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, -+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, -+ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, -+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, -+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, -+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, -+ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, -+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, -+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, -+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - RFC 7748 */ -+{ -+ .secret = (u8[32]){ 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, -+ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, -+ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, -+ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, -+ .b_public = (u8[32]){ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, -+ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, -+ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, -+ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, -+ .expected_ss = (u8[32]){ 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, -+ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, -+ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, -+ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, -+ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, -+ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, -+ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, -+ .expected_ss = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, -+ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, -+ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, -+ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, -+ .expected_ss = (u8[32]){ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, -+ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, -+ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, -+ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, -+ .expected_ss = (u8[32]){ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, -+ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, -+ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, -+ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, -+ .expected_ss = (u8[32]){ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, -+ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, -+ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, -+ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, -+ .expected_ss = (u8[32]){ 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, -+ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, -+ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, -+ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, -+ .expected_ss = (u8[32]){ 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, -+ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, -+ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, -+ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, -+ .expected_ss = (u8[32]){ 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, -+ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, -+ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, -+ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, -+ .expected_ss = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, -+ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, -+ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, -+ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, -+ .expected_ss = (u8[32]){ 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, -+ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, -+ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, -+ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, -+ .expected_ss = (u8[32]){ 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, -+ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, -+ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, -+ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, -+ .expected_ss = (u8[32]){ 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, -+ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, -+ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, -+ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, -+ .expected_ss = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, -+ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, -+ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, -+ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, -+ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - edge case for shared secret */ -+{ -+ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .b_public = (u8[32]){ 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, -+ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, -+ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, -+ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, -+ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - checking for overflow */ -+{ -+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .b_public = (u8[32]){ 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, -+ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, -+ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, -+ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, -+ .expected_ss = (u8[32]){ 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, -+ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, -+ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, -+ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - checking for overflow */ -+{ -+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .b_public = (u8[32]){ 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, -+ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, -+ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, -+ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, -+ .expected_ss = (u8[32]){ 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, -+ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, -+ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, -+ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - checking for overflow */ -+{ -+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .b_public = (u8[32]){ 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, -+ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, -+ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, -+ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, -+ .expected_ss = (u8[32]){ 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, -+ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, -+ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, -+ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - checking for overflow */ -+{ -+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .b_public = (u8[32]){ 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, -+ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, -+ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, -+ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, -+ .expected_ss = (u8[32]){ 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, -+ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, -+ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, -+ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - checking for overflow */ -+{ -+ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .b_public = (u8[32]){ 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, -+ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, -+ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, -+ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, -+ .expected_ss = (u8[32]){ 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, -+ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, -+ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, -+ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - private key == -1 (mod order) */ -+{ -+ .secret = (u8[32]){ 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, -+ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, -+ .b_public = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, -+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, -+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, -+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, -+ .expected_ss = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, -+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, -+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, -+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+}, -+/* wycheproof - private key == 1 (mod order) on twist */ -+{ -+ .secret = (u8[32]){ 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, -+ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, -+ .b_public = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, -+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, -+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, -+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, -+ .expected_ss = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, -+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, -+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, -+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, -+ .secret_size = 32, -+ .b_public_size = 32, -+ .expected_ss_size = 32, -+ -+} -+}; -+ - static const struct kpp_testvec ecdh_tv_template[] = { - { - #ifndef CONFIG_CRYPTO_FIPS diff --git a/target/linux/generic/backport-5.4/080-wireguard-0027-crypto-curve25519-implement-generic-KPP-driver.patch b/target/linux/generic/backport-5.4/080-wireguard-0027-crypto-curve25519-implement-generic-KPP-driver.patch deleted file mode 100644 index d909561690..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0027-crypto-curve25519-implement-generic-KPP-driver.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:34 +0100 -Subject: [PATCH] crypto: curve25519 - implement generic KPP driver - -commit ee772cb641135739c1530647391d5a04c39db192 upstream. - -Expose the generic Curve25519 library via the crypto API KPP interface. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/Kconfig | 5 +++ - crypto/Makefile | 1 + - crypto/curve25519-generic.c | 90 +++++++++++++++++++++++++++++++++++++ - 3 files changed, 96 insertions(+) - create mode 100644 crypto/curve25519-generic.c - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -264,6 +264,11 @@ config CRYPTO_ECRDSA - standard algorithms (called GOST algorithms). Only signature verification - is implemented. - -+config CRYPTO_CURVE25519 -+ tristate "Curve25519 algorithm" -+ select CRYPTO_KPP -+ select CRYPTO_LIB_CURVE25519_GENERIC -+ - comment "Authenticated Encryption with Associated Data" - - config CRYPTO_CCM ---- a/crypto/Makefile -+++ b/crypto/Makefile -@@ -167,6 +167,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o - obj-$(CONFIG_CRYPTO_OFB) += ofb.o - obj-$(CONFIG_CRYPTO_ECC) += ecc.o - obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o -+obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o - - ecdh_generic-y += ecdh.o - ecdh_generic-y += ecdh_helper.o ---- /dev/null -+++ b/crypto/curve25519-generic.c -@@ -0,0 +1,90 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+ -+#include <crypto/curve25519.h> -+#include <crypto/internal/kpp.h> -+#include <crypto/kpp.h> -+#include <linux/module.h> -+#include <linux/scatterlist.h> -+ -+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, -+ unsigned int len) -+{ -+ u8 *secret = kpp_tfm_ctx(tfm); -+ -+ if (!len) -+ curve25519_generate_secret(secret); -+ else if (len == CURVE25519_KEY_SIZE && -+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) -+ memcpy(secret, buf, CURVE25519_KEY_SIZE); -+ else -+ return -EINVAL; -+ return 0; -+} -+ -+static int curve25519_compute_value(struct kpp_request *req) -+{ -+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); -+ const u8 *secret = kpp_tfm_ctx(tfm); -+ u8 public_key[CURVE25519_KEY_SIZE]; -+ u8 buf[CURVE25519_KEY_SIZE]; -+ int copied, nbytes; -+ u8 const *bp; -+ -+ if (req->src) { -+ copied = sg_copy_to_buffer(req->src, -+ sg_nents_for_len(req->src, -+ CURVE25519_KEY_SIZE), -+ public_key, CURVE25519_KEY_SIZE); -+ if (copied != CURVE25519_KEY_SIZE) -+ return -EINVAL; -+ bp = public_key; -+ } else { -+ bp = curve25519_base_point; -+ } -+ -+ curve25519_generic(buf, secret, bp); -+ -+ /* might want less than we've got */ -+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); -+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, -+ nbytes), -+ buf, nbytes); -+ if (copied != nbytes) -+ return -EINVAL; -+ return 0; -+} -+ -+static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -+{ -+ return CURVE25519_KEY_SIZE; -+} -+ -+static struct kpp_alg curve25519_alg = { -+ .base.cra_name = "curve25519", -+ .base.cra_driver_name = "curve25519-generic", -+ .base.cra_priority = 100, -+ .base.cra_module = THIS_MODULE, -+ .base.cra_ctxsize = CURVE25519_KEY_SIZE, -+ -+ .set_secret = curve25519_set_secret, -+ .generate_public_key = curve25519_compute_value, -+ .compute_shared_secret = curve25519_compute_value, -+ .max_size = curve25519_max_size, -+}; -+ -+static int curve25519_init(void) -+{ -+ return crypto_register_kpp(&curve25519_alg); -+} -+ -+static void curve25519_exit(void) -+{ -+ crypto_unregister_kpp(&curve25519_alg); -+} -+ -+subsys_initcall(curve25519_init); -+module_exit(curve25519_exit); -+ -+MODULE_ALIAS_CRYPTO("curve25519"); -+MODULE_ALIAS_CRYPTO("curve25519-generic"); -+MODULE_LICENSE("GPL"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0028-crypto-lib-curve25519-work-around-Clang-stack-spilli.patch b/target/linux/generic/backport-5.4/080-wireguard-0028-crypto-lib-curve25519-work-around-Clang-stack-spilli.patch deleted file mode 100644 index 36b59c9aae..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0028-crypto-lib-curve25519-work-around-Clang-stack-spilli.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:35 +0100 -Subject: [PATCH] crypto: lib/curve25519 - work around Clang stack spilling - issue - -commit 660bb8e1f833ea63185fe80fde847e3e42f18e3b upstream. - -Arnd reports that the 32-bit generic library code for Curve25119 ends -up using an excessive amount of stack space when built with Clang: - - lib/crypto/curve25519-fiat32.c:756:6: error: stack frame size - of 1384 bytes in function 'curve25519_generic' - [-Werror,-Wframe-larger-than=] - -Let's give some hints to the compiler regarding which routines should -not be inlined, to prevent it from running out of registers and spilling -to the stack. The resulting code performs identically under both GCC -and Clang, and makes the warning go away. - -Suggested-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/curve25519-fiat32.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/lib/crypto/curve25519-fiat32.c -+++ b/lib/crypto/curve25519-fiat32.c -@@ -223,7 +223,7 @@ static __always_inline void fe_1(fe *h) - h->v[0] = 1; - } - --static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) - { - { const u32 x20 = in1[9]; - { const u32 x21 = in1[8]; -@@ -266,7 +266,7 @@ static __always_inline void fe_add(fe_lo - fe_add_impl(h->v, f->v, g->v); - } - --static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) - { - { const u32 x20 = in1[9]; - { const u32 x21 = in1[8]; -@@ -309,7 +309,7 @@ static __always_inline void fe_sub(fe_lo - fe_sub_impl(h->v, f->v, g->v); - } - --static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) -+static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) - { - { const u32 x20 = in1[9]; - { const u32 x21 = in1[8]; -@@ -441,7 +441,7 @@ fe_mul_tll(fe *h, const fe_loose *f, con - fe_mul_impl(h->v, f->v, g->v); - } - --static void fe_sqr_impl(u32 out[10], const u32 in1[10]) -+static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10]) - { - { const u32 x17 = in1[9]; - { const u32 x18 = in1[8]; -@@ -619,7 +619,7 @@ static __always_inline void fe_invert(fe - * - * Preconditions: b in {0,1} - */ --static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b) -+static noinline void fe_cswap(fe *f, fe *g, unsigned int b) - { - unsigned i; - b = 0 - b; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0029-crypto-curve25519-x86_64-library-and-KPP-implementat.patch b/target/linux/generic/backport-5.4/080-wireguard-0029-crypto-curve25519-x86_64-library-and-KPP-implementat.patch deleted file mode 100644 index 49fd970767..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0029-crypto-curve25519-x86_64-library-and-KPP-implementat.patch +++ /dev/null @@ -1,2536 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:36 +0100 -Subject: [PATCH] crypto: curve25519 - x86_64 library and KPP implementations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit bb611bdfd6be34d9f822c73305fcc83720499d38 upstream. - -This implementation is the fastest available x86_64 implementation, and -unlike Sandy2x, it doesn't requie use of the floating point registers at -all. Instead it makes use of BMI2 and ADX, available on recent -microarchitectures. The implementation was written by Armando -Faz-Hernández with contributions (upstream) from Samuel Neves and me, -in addition to further changes in the kernel implementation from us. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Samuel Neves <sneves@dei.uc.pt> -Co-developed-by: Samuel Neves <sneves@dei.uc.pt> -[ardb: - move to arch/x86/crypto - - wire into lib/crypto framework - - implement crypto API KPP hooks ] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/Makefile | 1 + - arch/x86/crypto/curve25519-x86_64.c | 2475 +++++++++++++++++++++++++++ - crypto/Kconfig | 6 + - 3 files changed, 2482 insertions(+) - create mode 100644 arch/x86/crypto/curve25519-x86_64.c - ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) - - obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o -+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o - - # These modules require assembler to support AVX. - ifeq ($(avx_supported),yes) ---- /dev/null -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -0,0 +1,2475 @@ -+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause -+/* -+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. -+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. -+ */ -+ -+#include <crypto/curve25519.h> -+#include <crypto/internal/kpp.h> -+ -+#include <linux/types.h> -+#include <linux/jump_label.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+#include <asm/cpufeature.h> -+#include <asm/processor.h> -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); -+ -+enum { NUM_WORDS_ELTFP25519 = 4 }; -+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; -+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; -+ -+#define mul_eltfp25519_1w_adx(c, a, b) do { \ -+ mul_256x256_integer_adx(m.buffer, a, b); \ -+ red_eltfp25519_1w_adx(c, m.buffer); \ -+} while (0) -+ -+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ -+ mul_256x256_integer_bmi2(m.buffer, a, b); \ -+ red_eltfp25519_1w_bmi2(c, m.buffer); \ -+} while (0) -+ -+#define sqr_eltfp25519_1w_adx(a) do { \ -+ sqr_256x256_integer_adx(m.buffer, a); \ -+ red_eltfp25519_1w_adx(a, m.buffer); \ -+} while (0) -+ -+#define sqr_eltfp25519_1w_bmi2(a) do { \ -+ sqr_256x256_integer_bmi2(m.buffer, a); \ -+ red_eltfp25519_1w_bmi2(a, m.buffer); \ -+} while (0) -+ -+#define mul_eltfp25519_2w_adx(c, a, b) do { \ -+ mul2_256x256_integer_adx(m.buffer, a, b); \ -+ red_eltfp25519_2w_adx(c, m.buffer); \ -+} while (0) -+ -+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ -+ mul2_256x256_integer_bmi2(m.buffer, a, b); \ -+ red_eltfp25519_2w_bmi2(c, m.buffer); \ -+} while (0) -+ -+#define sqr_eltfp25519_2w_adx(a) do { \ -+ sqr2_256x256_integer_adx(m.buffer, a); \ -+ red_eltfp25519_2w_adx(a, m.buffer); \ -+} while (0) -+ -+#define sqr_eltfp25519_2w_bmi2(a) do { \ -+ sqr2_256x256_integer_bmi2(m.buffer, a); \ -+ red_eltfp25519_2w_bmi2(a, m.buffer); \ -+} while (0) -+ -+#define sqrn_eltfp25519_1w_adx(a, times) do { \ -+ int ____counter = (times); \ -+ while (____counter-- > 0) \ -+ sqr_eltfp25519_1w_adx(a); \ -+} while (0) -+ -+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ -+ int ____counter = (times); \ -+ while (____counter-- > 0) \ -+ sqr_eltfp25519_1w_bmi2(a); \ -+} while (0) -+ -+#define copy_eltfp25519_1w(C, A) do { \ -+ (C)[0] = (A)[0]; \ -+ (C)[1] = (A)[1]; \ -+ (C)[2] = (A)[2]; \ -+ (C)[3] = (A)[3]; \ -+} while (0) -+ -+#define setzero_eltfp25519_1w(C) do { \ -+ (C)[0] = 0; \ -+ (C)[1] = 0; \ -+ (C)[2] = 0; \ -+ (C)[3] = 0; \ -+} while (0) -+ -+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { -+ /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, -+ 0xffffffffffffffffUL, 0x5fffffffffffffffUL, -+ /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, -+ 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, -+ /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, -+ 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, -+ /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, -+ 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, -+ /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, -+ 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, -+ /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, -+ 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, -+ /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, -+ 0xc1c20d06231f7614UL, 0x2938218da274f972UL, -+ /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, -+ 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, -+ /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, -+ 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, -+ /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, -+ 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, -+ /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, -+ 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, -+ /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, -+ 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, -+ /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, -+ 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, -+ /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, -+ 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, -+ /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, -+ 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, -+ /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, -+ 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, -+ /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, -+ 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, -+ /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, -+ 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, -+ /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, -+ 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, -+ /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, -+ 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, -+ /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, -+ 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, -+ /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, -+ 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, -+ /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, -+ 0x23758739f630a257UL, 0x295a407a01a78580UL, -+ /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, -+ 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, -+ /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, -+ 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, -+ /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, -+ 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, -+ /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, -+ 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, -+ /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, -+ 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, -+ /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, -+ 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, -+ /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, -+ 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, -+ /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, -+ 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, -+ /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, -+ 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, -+ /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, -+ 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, -+ /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, -+ 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, -+ /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, -+ 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, -+ /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, -+ 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, -+ /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, -+ 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, -+ /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, -+ 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, -+ /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, -+ 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, -+ /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, -+ 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, -+ /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, -+ 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, -+ /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, -+ 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, -+ /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, -+ 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, -+ /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, -+ 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, -+ /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, -+ 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, -+ /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, -+ 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, -+ /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, -+ 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, -+ /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, -+ 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, -+ /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, -+ 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, -+ /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, -+ 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, -+ /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, -+ 0xc189218075e91436UL, 0x6d9284169b3b8484UL, -+ /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, -+ 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, -+ /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, -+ 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, -+ /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, -+ 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, -+ /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, -+ 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, -+ /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, -+ 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, -+ /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, -+ 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, -+ /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, -+ 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, -+ /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, -+ 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, -+ /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, -+ 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, -+ /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, -+ 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, -+ /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, -+ 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, -+ /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, -+ 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, -+ /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, -+ 0x25232973322dbef4UL, 0x445dc4758c17f770UL, -+ /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, -+ 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, -+ /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, -+ 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, -+ /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, -+ 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, -+ /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, -+ 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, -+ /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, -+ 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, -+ /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, -+ 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, -+ /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, -+ 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, -+ /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, -+ 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, -+ /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, -+ 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, -+ /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, -+ 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, -+ /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, -+ 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, -+ /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, -+ 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, -+ /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, -+ 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, -+ /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, -+ 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, -+ /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, -+ 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, -+ /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, -+ 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, -+ /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, -+ 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, -+ /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, -+ 0x894d1d855ae52359UL, 0x68e122157b743d69UL, -+ /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, -+ 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, -+ /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, -+ 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, -+ /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, -+ 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, -+ /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, -+ 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, -+ /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, -+ 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, -+ /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, -+ 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, -+ /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, -+ 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, -+ /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, -+ 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, -+ /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, -+ 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, -+ /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, -+ 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, -+ /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, -+ 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, -+ /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, -+ 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, -+ /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, -+ 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, -+ /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, -+ 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, -+ /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, -+ 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, -+ /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, -+ 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, -+ /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, -+ 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, -+ /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, -+ 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, -+ /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, -+ 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, -+ /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, -+ 0x4a497962066e6043UL, 0x705b3aab41355b44UL, -+ /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, -+ 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, -+ /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, -+ 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, -+ /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, -+ 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, -+ /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, -+ 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, -+ /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, -+ 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, -+ /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, -+ 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, -+ /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, -+ 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, -+ /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, -+ 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, -+ /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, -+ 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, -+ /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, -+ 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, -+ /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, -+ 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, -+ /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, -+ 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, -+ /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, -+ 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, -+ /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, -+ 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, -+ /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, -+ 0x508e862f121692fcUL, 0x3a81907fa093c291UL, -+ /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, -+ 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, -+ /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, -+ 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, -+ /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, -+ 0xe488de11d761e352UL, 0x0e878a01a085545cUL, -+ /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, -+ 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, -+ /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, -+ 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, -+ /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, -+ 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, -+ /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, -+ 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, -+ /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, -+ 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, -+ /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, -+ 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, -+ /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, -+ 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, -+ /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, -+ 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, -+ /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, -+ 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, -+ /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, -+ 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, -+ /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, -+ 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, -+ /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, -+ 0x266fd5809208f294UL, 0x5c847085619a26b9UL, -+ /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, -+ 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, -+ /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, -+ 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, -+ /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, -+ 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, -+ /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, -+ 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, -+ /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, -+ 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, -+ /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, -+ 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, -+ /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, -+ 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, -+ /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, -+ 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, -+ /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, -+ 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, -+ /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, -+ 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, -+ /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, -+ 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, -+ /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, -+ 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, -+ /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, -+ 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, -+ /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, -+ 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, -+ /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, -+ 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, -+ /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, -+ 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, -+ /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, -+ 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, -+ /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, -+ 0x52d17436309d4253UL, 0x356f97e13efae576UL, -+ /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, -+ 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, -+ /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, -+ 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, -+ /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, -+ 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, -+ /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, -+ 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, -+ /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, -+ 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, -+ /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, -+ 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, -+ /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, -+ 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, -+ /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, -+ 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, -+ /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, -+ 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, -+ /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, -+ 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, -+ /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, -+ 0x497d723f802e88e1UL, 0x30684dea602f408dUL, -+ /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, -+ 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, -+ /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, -+ 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, -+ /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, -+ 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, -+ /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, -+ 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, -+ /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, -+ 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, -+ /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, -+ 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, -+ /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, -+ 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, -+ /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, -+ 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, -+ /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, -+ 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, -+ /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, -+ 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, -+ /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, -+ 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, -+ /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, -+ 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, -+ /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, -+ 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, -+ /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, -+ 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, -+ /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, -+ 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, -+ /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, -+ 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, -+ /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, -+ 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, -+ /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, -+ 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, -+ /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, -+ 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, -+ /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, -+ 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, -+ /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, -+ 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, -+ /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, -+ 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, -+ /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, -+ 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, -+ /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, -+ 0x81004b71e33cc191UL, 0x44e6be345122803cUL, -+ /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, -+ 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, -+ /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, -+ 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, -+ /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, -+ 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, -+ /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, -+ 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, -+ /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, -+ 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, -+ /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, -+ 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, -+ /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, -+ 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, -+ /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, -+ 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, -+ /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, -+ 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, -+ /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, -+ 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, -+ /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, -+ 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, -+ /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, -+ 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, -+ /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, -+ 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, -+ /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, -+ 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, -+ /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, -+ 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, -+ /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, -+ 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, -+ /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, -+ 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, -+ /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, -+ 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, -+ /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, -+ 0x33979624f0e917beUL, 0x2c018dc527356b30UL, -+ /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, -+ 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, -+ /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, -+ 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, -+ /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, -+ 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, -+ /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, -+ 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, -+ /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, -+ 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, -+ /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, -+ 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, -+ /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, -+ 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, -+ /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, -+ 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, -+ /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, -+ 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, -+ /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, -+ 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, -+ /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, -+ 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, -+ /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, -+ 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, -+ /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, -+ 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, -+ /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, -+ 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, -+ /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, -+ 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, -+ /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, -+ 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, -+ /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, -+ 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, -+ /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, -+ 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, -+ /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, -+ 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, -+ /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, -+ 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, -+ /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, -+ 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, -+ /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, -+ 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, -+ /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, -+ 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, -+ /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, -+ 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, -+ /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, -+ 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, -+ /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, -+ 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, -+ /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, -+ 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, -+ /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, -+ 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, -+ /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, -+ 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, -+ /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, -+ 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, -+ /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, -+ 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, -+ /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, -+ 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, -+ /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, -+ 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, -+ /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, -+ 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, -+ /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, -+ 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, -+ /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, -+ 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, -+ /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, -+ 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, -+ /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, -+ 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, -+ /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, -+ 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, -+ /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, -+ 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, -+ /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, -+ 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, -+ /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, -+ 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, -+ /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, -+ 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, -+ /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, -+ 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, -+ /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, -+ 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, -+ /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, -+ 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, -+ /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, -+ 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, -+ /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, -+ 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL -+}; -+ -+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] -+ * a is two 256-bit integers: a0[0:3] and a1[4:7] -+ * b is two 256-bit integers: b0[0:3] and b1[4:7] -+ */ -+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, -+ const u64 *const b) -+{ -+ asm volatile( -+ "xorl %%r14d, %%r14d ;" -+ "movq (%1), %%rdx; " /* A[0] */ -+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "movq %%r8, (%0) ;" -+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -+ "adox %%r10, %%r15 ;" -+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -+ "adox %%r8, %%rax ;" -+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -+ "adox %%r10, %%rbx ;" -+ /******************************************/ -+ "adox %%r14, %%rcx ;" -+ -+ "movq 8(%1), %%rdx; " /* A[1] */ -+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -+ "adox %%r15, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rax ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%rbx ;" -+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%rcx ;" -+ /******************************************/ -+ "adox %%r14, %%r15 ;" -+ "adcx %%r14, %%r15 ;" -+ -+ "movq 16(%1), %%rdx; " /* A[2] */ -+ "xorl %%r10d, %%r10d ;" -+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -+ "adox %%rax, %%r8 ;" -+ "movq %%r8, 16(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rbx ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%rcx ;" -+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%r15 ;" -+ /******************************************/ -+ "adox %%r14, %%rax ;" -+ "adcx %%r14, %%rax ;" -+ -+ "movq 24(%1), %%rdx; " /* A[3] */ -+ "xorl %%r10d, %%r10d ;" -+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -+ "adox %%rbx, %%r8 ;" -+ "movq %%r8, 24(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rcx ;" -+ "movq %%rcx, 32(%0) ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%rax ;" -+ "movq %%rax, 48(%0) ;" -+ /******************************************/ -+ "adox %%r14, %%rbx ;" -+ "adcx %%r14, %%rbx ;" -+ "movq %%rbx, 56(%0) ;" -+ -+ "movq 32(%1), %%rdx; " /* C[0] */ -+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "movq %%r8, 64(%0);" -+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ -+ "adox %%r10, %%r15 ;" -+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ -+ "adox %%r8, %%rax ;" -+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ -+ "adox %%r10, %%rbx ;" -+ /******************************************/ -+ "adox %%r14, %%rcx ;" -+ -+ "movq 40(%1), %%rdx; " /* C[1] */ -+ "xorl %%r10d, %%r10d ;" -+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ -+ "adox %%r15, %%r8 ;" -+ "movq %%r8, 72(%0);" -+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rax ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%rbx ;" -+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%rcx ;" -+ /******************************************/ -+ "adox %%r14, %%r15 ;" -+ "adcx %%r14, %%r15 ;" -+ -+ "movq 48(%1), %%rdx; " /* C[2] */ -+ "xorl %%r10d, %%r10d ;" -+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ -+ "adox %%rax, %%r8 ;" -+ "movq %%r8, 80(%0);" -+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rbx ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%rcx ;" -+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%r15 ;" -+ /******************************************/ -+ "adox %%r14, %%rax ;" -+ "adcx %%r14, %%rax ;" -+ -+ "movq 56(%1), %%rdx; " /* C[3] */ -+ "xorl %%r10d, %%r10d ;" -+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ -+ "adox %%rbx, %%r8 ;" -+ "movq %%r8, 88(%0);" -+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ -+ "adox %%r10, %%r9 ;" -+ "adcx %%r9, %%rcx ;" -+ "movq %%rcx, 96(%0) ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ -+ "adox %%r8, %%r11 ;" -+ "adcx %%r11, %%r15 ;" -+ "movq %%r15, 104(%0) ;" -+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ -+ "adox %%r10, %%r13 ;" -+ "adcx %%r13, %%rax ;" -+ "movq %%rax, 112(%0) ;" -+ /******************************************/ -+ "adox %%r14, %%rbx ;" -+ "adcx %%r14, %%rbx ;" -+ "movq %%rbx, 120(%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11", "%r13", "%r14", "%r15"); -+} -+ -+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, -+ const u64 *const b) -+{ -+ asm volatile( -+ "movq (%1), %%rdx; " /* A[0] */ -+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -+ "movq %%r8, (%0) ;" -+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -+ "addq %%r10, %%r15 ;" -+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -+ "adcq %%r8, %%rax ;" -+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -+ "adcq %%r10, %%rbx ;" -+ /******************************************/ -+ "adcq $0, %%rcx ;" -+ -+ "movq 8(%1), %%rdx; " /* A[1] */ -+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -+ "addq %%r15, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%r15 ;" -+ -+ "addq %%r9, %%rax ;" -+ "adcq %%r11, %%rbx ;" -+ "adcq %%r13, %%rcx ;" -+ "adcq $0, %%r15 ;" -+ -+ "movq 16(%1), %%rdx; " /* A[2] */ -+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -+ "addq %%rax, %%r8 ;" -+ "movq %%r8, 16(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rax ;" -+ -+ "addq %%r9, %%rbx ;" -+ "adcq %%r11, %%rcx ;" -+ "adcq %%r13, %%r15 ;" -+ "adcq $0, %%rax ;" -+ -+ "movq 24(%1), %%rdx; " /* A[3] */ -+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -+ "addq %%rbx, %%r8 ;" -+ "movq %%r8, 24(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rbx ;" -+ -+ "addq %%r9, %%rcx ;" -+ "movq %%rcx, 32(%0) ;" -+ "adcq %%r11, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "adcq %%r13, %%rax ;" -+ "movq %%rax, 48(%0) ;" -+ "adcq $0, %%rbx ;" -+ "movq %%rbx, 56(%0) ;" -+ -+ "movq 32(%1), %%rdx; " /* C[0] */ -+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ -+ "movq %%r8, 64(%0) ;" -+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ -+ "addq %%r10, %%r15 ;" -+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ -+ "adcq %%r8, %%rax ;" -+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ -+ "adcq %%r10, %%rbx ;" -+ /******************************************/ -+ "adcq $0, %%rcx ;" -+ -+ "movq 40(%1), %%rdx; " /* C[1] */ -+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ -+ "addq %%r15, %%r8 ;" -+ "movq %%r8, 72(%0) ;" -+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%r15 ;" -+ -+ "addq %%r9, %%rax ;" -+ "adcq %%r11, %%rbx ;" -+ "adcq %%r13, %%rcx ;" -+ "adcq $0, %%r15 ;" -+ -+ "movq 48(%1), %%rdx; " /* C[2] */ -+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ -+ "addq %%rax, %%r8 ;" -+ "movq %%r8, 80(%0) ;" -+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rax ;" -+ -+ "addq %%r9, %%rbx ;" -+ "adcq %%r11, %%rcx ;" -+ "adcq %%r13, %%r15 ;" -+ "adcq $0, %%rax ;" -+ -+ "movq 56(%1), %%rdx; " /* C[3] */ -+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ -+ "addq %%rbx, %%r8 ;" -+ "movq %%r8, 88(%0) ;" -+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rbx ;" -+ -+ "addq %%r9, %%rcx ;" -+ "movq %%rcx, 96(%0) ;" -+ "adcq %%r11, %%r15 ;" -+ "movq %%r15, 104(%0) ;" -+ "adcq %%r13, %%rax ;" -+ "movq %%rax, 112(%0) ;" -+ "adcq $0, %%rbx ;" -+ "movq %%rbx, 120(%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11", "%r13", "%r15"); -+} -+ -+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movq (%1), %%rdx ;" /* A[0] */ -+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ -+ "xorl %%r15d, %%r15d;" -+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ -+ "adcx %%r14, %%r9 ;" -+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ -+ "adcx %%rax, %%r10 ;" -+ "movq 24(%1), %%rdx ;" /* A[3] */ -+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ -+ "adcx %%rcx, %%r11 ;" -+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ -+ "adcx %%rax, %%rbx ;" -+ "movq 8(%1), %%rdx ;" /* A[1] */ -+ "adcx %%r15, %%r13 ;" -+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ -+ "movq $0, %%r14 ;" -+ /******************************************/ -+ "adcx %%r15, %%r14 ;" -+ -+ "xorl %%r15d, %%r15d;" -+ "adox %%rax, %%r10 ;" -+ "adcx %%r8, %%r8 ;" -+ "adox %%rcx, %%r11 ;" -+ "adcx %%r9, %%r9 ;" -+ "adox %%r15, %%rbx ;" -+ "adcx %%r10, %%r10 ;" -+ "adox %%r15, %%r13 ;" -+ "adcx %%r11, %%r11 ;" -+ "adox %%r15, %%r14 ;" -+ "adcx %%rbx, %%rbx ;" -+ "adcx %%r13, %%r13 ;" -+ "adcx %%r14, %%r14 ;" -+ -+ "movq (%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -+ /*******************/ -+ "movq %%rax, 0(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "movq 8(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 16(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 24(%0) ;" -+ "movq 16(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 32(%0) ;" -+ "adcq %%rcx, %%rbx ;" -+ "movq %%rbx, 40(%0) ;" -+ "movq 24(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 48(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 56(%0) ;" -+ -+ -+ "movq 32(%1), %%rdx ;" /* B[0] */ -+ "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ -+ "xorl %%r15d, %%r15d;" -+ "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ -+ "adcx %%r14, %%r9 ;" -+ "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ -+ "adcx %%rax, %%r10 ;" -+ "movq 56(%1), %%rdx ;" /* B[3] */ -+ "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ -+ "adcx %%rcx, %%r11 ;" -+ "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ -+ "adcx %%rax, %%rbx ;" -+ "movq 40(%1), %%rdx ;" /* B[1] */ -+ "adcx %%r15, %%r13 ;" -+ "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ -+ "movq $0, %%r14 ;" -+ /******************************************/ -+ "adcx %%r15, %%r14 ;" -+ -+ "xorl %%r15d, %%r15d;" -+ "adox %%rax, %%r10 ;" -+ "adcx %%r8, %%r8 ;" -+ "adox %%rcx, %%r11 ;" -+ "adcx %%r9, %%r9 ;" -+ "adox %%r15, %%rbx ;" -+ "adcx %%r10, %%r10 ;" -+ "adox %%r15, %%r13 ;" -+ "adcx %%r11, %%r11 ;" -+ "adox %%r15, %%r14 ;" -+ "adcx %%rbx, %%rbx ;" -+ "adcx %%r13, %%r13 ;" -+ "adcx %%r14, %%r14 ;" -+ -+ "movq 32(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ -+ /*******************/ -+ "movq %%rax, 64(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 72(%0) ;" -+ "movq 40(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 80(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 88(%0) ;" -+ "movq 48(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 96(%0) ;" -+ "adcq %%rcx, %%rbx ;" -+ "movq %%rbx, 104(%0) ;" -+ "movq 56(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 112(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 120(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11", "%r13", "%r14", "%r15"); -+} -+ -+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movq 8(%1), %%rdx ;" /* A[1] */ -+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ -+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ -+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ -+ -+ "movq 16(%1), %%rdx ;" /* A[2] */ -+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ -+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ -+ -+ "addq %%rax, %%r9 ;" -+ "adcq %%rdx, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq %%r14, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "movq $0, %%r14 ;" -+ "adcq $0, %%r14 ;" -+ -+ "movq (%1), %%rdx ;" /* A[0] */ -+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ -+ -+ "addq %%rax, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq $0, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "adcq $0, %%r14 ;" -+ -+ "shldq $1, %%r13, %%r14 ;" -+ "shldq $1, %%r15, %%r13 ;" -+ "shldq $1, %%r11, %%r15 ;" -+ "shldq $1, %%r10, %%r11 ;" -+ "shldq $1, %%r9, %%r10 ;" -+ "shldq $1, %%r8, %%r9 ;" -+ "shlq $1, %%r8 ;" -+ -+ /*******************/ -+ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ -+ /*******************/ -+ "movq %%rax, 0(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "movq 8(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 16(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 24(%0) ;" -+ "movq 16(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 32(%0) ;" -+ "adcq %%rcx, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "movq 24(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 48(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 56(%0) ;" -+ -+ "movq 40(%1), %%rdx ;" /* B[1] */ -+ "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ -+ "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ -+ "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ -+ -+ "movq 48(%1), %%rdx ;" /* B[2] */ -+ "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ -+ "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ -+ -+ "addq %%rax, %%r9 ;" -+ "adcq %%rdx, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq %%r14, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "movq $0, %%r14 ;" -+ "adcq $0, %%r14 ;" -+ -+ "movq 32(%1), %%rdx ;" /* B[0] */ -+ "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ -+ -+ "addq %%rax, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq $0, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "adcq $0, %%r14 ;" -+ -+ "shldq $1, %%r13, %%r14 ;" -+ "shldq $1, %%r15, %%r13 ;" -+ "shldq $1, %%r11, %%r15 ;" -+ "shldq $1, %%r10, %%r11 ;" -+ "shldq $1, %%r9, %%r10 ;" -+ "shldq $1, %%r8, %%r9 ;" -+ "shlq $1, %%r8 ;" -+ -+ /*******************/ -+ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ -+ /*******************/ -+ "movq %%rax, 64(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 72(%0) ;" -+ "movq 40(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 80(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 88(%0) ;" -+ "movq 48(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 96(%0) ;" -+ "adcq %%rcx, %%r15 ;" -+ "movq %%r15, 104(%0) ;" -+ "movq 56(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 112(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 120(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -+ "%r11", "%r13", "%r14", "%r15"); -+} -+ -+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ -+ "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ -+ "xorl %%ebx, %%ebx ;" -+ "adox (%1), %%r8 ;" -+ "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ -+ "adcx %%r10, %%r9 ;" -+ "adox 8(%1), %%r9 ;" -+ "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ -+ "adcx %%r11, %%r10 ;" -+ "adox 16(%1), %%r10 ;" -+ "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ -+ "adcx %%rax, %%r11 ;" -+ "adox 24(%1), %%r11 ;" -+ /***************************************/ -+ "adcx %%rbx, %%rcx ;" -+ "adox %%rbx, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -+ "adcx %%rcx, %%r8 ;" -+ "adcx %%rbx, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcx %%rbx, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcx %%rbx, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ -+ "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ -+ "xorl %%ebx, %%ebx ;" -+ "adox 64(%1), %%r8 ;" -+ "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ -+ "adcx %%r10, %%r9 ;" -+ "adox 72(%1), %%r9 ;" -+ "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ -+ "adcx %%r11, %%r10 ;" -+ "adox 80(%1), %%r10 ;" -+ "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ -+ "adcx %%rax, %%r11 ;" -+ "adox 88(%1), %%r11 ;" -+ /****************************************/ -+ "adcx %%rbx, %%rcx ;" -+ "adox %%rbx, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -+ "adcx %%rcx, %%r8 ;" -+ "adcx %%rbx, %%r9 ;" -+ "movq %%r9, 40(%0) ;" -+ "adcx %%rbx, %%r10 ;" -+ "movq %%r10, 48(%0) ;" -+ "adcx %%rbx, %%r11 ;" -+ "movq %%r11, 56(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 32(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11"); -+} -+ -+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ -+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -+ "addq %%r10, %%r9 ;" -+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -+ "adcq %%r11, %%r10 ;" -+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -+ "adcq %%rax, %%r11 ;" -+ /***************************************/ -+ "adcq $0, %%rcx ;" -+ "addq (%1), %%r8 ;" -+ "adcq 8(%1), %%r9 ;" -+ "adcq 16(%1), %%r10 ;" -+ "adcq 24(%1), %%r11 ;" -+ "adcq $0, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -+ "addq %%rcx, %%r8 ;" -+ "adcq $0, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcq $0, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcq $0, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ -+ "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ -+ "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ -+ "addq %%r10, %%r9 ;" -+ "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ -+ "adcq %%r11, %%r10 ;" -+ "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ -+ "adcq %%rax, %%r11 ;" -+ /****************************************/ -+ "adcq $0, %%rcx ;" -+ "addq 64(%1), %%r8 ;" -+ "adcq 72(%1), %%r9 ;" -+ "adcq 80(%1), %%r10 ;" -+ "adcq 88(%1), %%r11 ;" -+ "adcq $0, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -+ "addq %%rcx, %%r8 ;" -+ "adcq $0, %%r9 ;" -+ "movq %%r9, 40(%0) ;" -+ "adcq $0, %%r10 ;" -+ "movq %%r10, 48(%0) ;" -+ "adcq $0, %%r11 ;" -+ "movq %%r11, 56(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 32(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -+ "%r11"); -+} -+ -+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, -+ const u64 *const b) -+{ -+ asm volatile( -+ "movq (%1), %%rdx; " /* A[0] */ -+ "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "movq %%r8, (%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ -+ "adox %%r9, %%r10 ;" -+ "movq %%r10, 8(%0) ;" -+ "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ -+ "adox %%r11, %%r15 ;" -+ "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ -+ "adox %%r13, %%r14 ;" -+ "movq $0, %%rax ;" -+ /******************************************/ -+ "adox %%rdx, %%rax ;" -+ -+ "movq 8(%1), %%rdx; " /* A[1] */ -+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "adcx 8(%0), %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -+ "adox %%r9, %%r10 ;" -+ "adcx %%r15, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ -+ "adox %%r11, %%r15 ;" -+ "adcx %%r14, %%r15 ;" -+ "movq $0, %%r8 ;" -+ "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ -+ "adox %%r13, %%r14 ;" -+ "adcx %%rax, %%r14 ;" -+ "movq $0, %%rax ;" -+ /******************************************/ -+ "adox %%rdx, %%rax ;" -+ "adcx %%r8, %%rax ;" -+ -+ "movq 16(%1), %%rdx; " /* A[2] */ -+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "adcx 16(%0), %%r8 ;" -+ "movq %%r8, 16(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -+ "adox %%r9, %%r10 ;" -+ "adcx %%r15, %%r10 ;" -+ "movq %%r10, 24(%0) ;" -+ "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ -+ "adox %%r11, %%r15 ;" -+ "adcx %%r14, %%r15 ;" -+ "movq $0, %%r8 ;" -+ "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ -+ "adox %%r13, %%r14 ;" -+ "adcx %%rax, %%r14 ;" -+ "movq $0, %%rax ;" -+ /******************************************/ -+ "adox %%rdx, %%rax ;" -+ "adcx %%r8, %%rax ;" -+ -+ "movq 24(%1), %%rdx; " /* A[3] */ -+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -+ "xorl %%r10d, %%r10d ;" -+ "adcx 24(%0), %%r8 ;" -+ "movq %%r8, 24(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -+ "adox %%r9, %%r10 ;" -+ "adcx %%r15, %%r10 ;" -+ "movq %%r10, 32(%0) ;" -+ "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ -+ "adox %%r11, %%r15 ;" -+ "adcx %%r14, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "movq $0, %%r8 ;" -+ "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ -+ "adox %%r13, %%r14 ;" -+ "adcx %%rax, %%r14 ;" -+ "movq %%r14, 48(%0) ;" -+ "movq $0, %%rax ;" -+ /******************************************/ -+ "adox %%rdx, %%rax ;" -+ "adcx %%r8, %%rax ;" -+ "movq %%rax, 56(%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", -+ "%r13", "%r14", "%r15"); -+} -+ -+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, -+ const u64 *const b) -+{ -+ asm volatile( -+ "movq (%1), %%rdx; " /* A[0] */ -+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -+ "movq %%r8, (%0) ;" -+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -+ "addq %%r10, %%r15 ;" -+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -+ "adcq %%r8, %%rax ;" -+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -+ "adcq %%r10, %%rbx ;" -+ /******************************************/ -+ "adcq $0, %%rcx ;" -+ -+ "movq 8(%1), %%rdx; " /* A[1] */ -+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -+ "addq %%r15, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%r15 ;" -+ -+ "addq %%r9, %%rax ;" -+ "adcq %%r11, %%rbx ;" -+ "adcq %%r13, %%rcx ;" -+ "adcq $0, %%r15 ;" -+ -+ "movq 16(%1), %%rdx; " /* A[2] */ -+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -+ "addq %%rax, %%r8 ;" -+ "movq %%r8, 16(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rax ;" -+ -+ "addq %%r9, %%rbx ;" -+ "adcq %%r11, %%rcx ;" -+ "adcq %%r13, %%r15 ;" -+ "adcq $0, %%rax ;" -+ -+ "movq 24(%1), %%rdx; " /* A[3] */ -+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -+ "addq %%rbx, %%r8 ;" -+ "movq %%r8, 24(%0) ;" -+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -+ "adcq %%r10, %%r9 ;" -+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -+ "adcq %%r8, %%r11 ;" -+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -+ "adcq %%r10, %%r13 ;" -+ /******************************************/ -+ "adcq $0, %%rbx ;" -+ -+ "addq %%r9, %%rcx ;" -+ "movq %%rcx, 32(%0) ;" -+ "adcq %%r11, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "adcq %%r13, %%rax ;" -+ "movq %%rax, 48(%0) ;" -+ "adcq $0, %%rbx ;" -+ "movq %%rbx, 56(%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11", "%r13", "%r15"); -+} -+ -+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movq (%1), %%rdx ;" /* A[0] */ -+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ -+ "xorl %%r15d, %%r15d;" -+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ -+ "adcx %%r14, %%r9 ;" -+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ -+ "adcx %%rax, %%r10 ;" -+ "movq 24(%1), %%rdx ;" /* A[3] */ -+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ -+ "adcx %%rcx, %%r11 ;" -+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ -+ "adcx %%rax, %%rbx ;" -+ "movq 8(%1), %%rdx ;" /* A[1] */ -+ "adcx %%r15, %%r13 ;" -+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ -+ "movq $0, %%r14 ;" -+ /******************************************/ -+ "adcx %%r15, %%r14 ;" -+ -+ "xorl %%r15d, %%r15d;" -+ "adox %%rax, %%r10 ;" -+ "adcx %%r8, %%r8 ;" -+ "adox %%rcx, %%r11 ;" -+ "adcx %%r9, %%r9 ;" -+ "adox %%r15, %%rbx ;" -+ "adcx %%r10, %%r10 ;" -+ "adox %%r15, %%r13 ;" -+ "adcx %%r11, %%r11 ;" -+ "adox %%r15, %%r14 ;" -+ "adcx %%rbx, %%rbx ;" -+ "adcx %%r13, %%r13 ;" -+ "adcx %%r14, %%r14 ;" -+ -+ "movq (%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -+ /*******************/ -+ "movq %%rax, 0(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "movq 8(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 16(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 24(%0) ;" -+ "movq 16(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 32(%0) ;" -+ "adcq %%rcx, %%rbx ;" -+ "movq %%rbx, 40(%0) ;" -+ "movq 24(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 48(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 56(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11", "%r13", "%r14", "%r15"); -+} -+ -+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movq 8(%1), %%rdx ;" /* A[1] */ -+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ -+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ -+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ -+ -+ "movq 16(%1), %%rdx ;" /* A[2] */ -+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ -+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ -+ -+ "addq %%rax, %%r9 ;" -+ "adcq %%rdx, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq %%r14, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "movq $0, %%r14 ;" -+ "adcq $0, %%r14 ;" -+ -+ "movq (%1), %%rdx ;" /* A[0] */ -+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ -+ -+ "addq %%rax, %%r10 ;" -+ "adcq %%rcx, %%r11 ;" -+ "adcq $0, %%r15 ;" -+ "adcq $0, %%r13 ;" -+ "adcq $0, %%r14 ;" -+ -+ "shldq $1, %%r13, %%r14 ;" -+ "shldq $1, %%r15, %%r13 ;" -+ "shldq $1, %%r11, %%r15 ;" -+ "shldq $1, %%r10, %%r11 ;" -+ "shldq $1, %%r9, %%r10 ;" -+ "shldq $1, %%r8, %%r9 ;" -+ "shlq $1, %%r8 ;" -+ -+ /*******************/ -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -+ /*******************/ -+ "movq %%rax, 0(%0) ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, 8(%0) ;" -+ "movq 8(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -+ "adcq %%rax, %%r9 ;" -+ "movq %%r9, 16(%0) ;" -+ "adcq %%rcx, %%r10 ;" -+ "movq %%r10, 24(%0) ;" -+ "movq 16(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -+ "adcq %%rax, %%r11 ;" -+ "movq %%r11, 32(%0) ;" -+ "adcq %%rcx, %%r15 ;" -+ "movq %%r15, 40(%0) ;" -+ "movq 24(%1), %%rdx ;" -+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -+ "adcq %%rax, %%r13 ;" -+ "movq %%r13, 48(%0) ;" -+ "adcq %%rcx, %%r14 ;" -+ "movq %%r14, 56(%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -+ "%r11", "%r13", "%r14", "%r15"); -+} -+ -+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ -+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -+ "xorl %%ebx, %%ebx ;" -+ "adox (%1), %%r8 ;" -+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -+ "adcx %%r10, %%r9 ;" -+ "adox 8(%1), %%r9 ;" -+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -+ "adcx %%r11, %%r10 ;" -+ "adox 16(%1), %%r10 ;" -+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -+ "adcx %%rax, %%r11 ;" -+ "adox 24(%1), %%r11 ;" -+ /***************************************/ -+ "adcx %%rbx, %%rcx ;" -+ "adox %%rbx, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -+ "adcx %%rcx, %%r8 ;" -+ "adcx %%rbx, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcx %%rbx, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcx %%rbx, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -+ "%r10", "%r11"); -+} -+ -+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) -+{ -+ asm volatile( -+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ -+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -+ "addq %%r10, %%r9 ;" -+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -+ "adcq %%r11, %%r10 ;" -+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -+ "adcq %%rax, %%r11 ;" -+ /***************************************/ -+ "adcq $0, %%rcx ;" -+ "addq (%1), %%r8 ;" -+ "adcq 8(%1), %%r9 ;" -+ "adcq 16(%1), %%r10 ;" -+ "adcq 24(%1), %%r11 ;" -+ "adcq $0, %%rcx ;" -+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -+ "addq %%rcx, %%r8 ;" -+ "adcq $0, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcq $0, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcq $0, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a) -+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -+ "%r11"); -+} -+ -+static __always_inline void -+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) -+{ -+ asm volatile( -+ "mov $38, %%eax ;" -+ "xorl %%ecx, %%ecx ;" -+ "movq (%2), %%r8 ;" -+ "adcx (%1), %%r8 ;" -+ "movq 8(%2), %%r9 ;" -+ "adcx 8(%1), %%r9 ;" -+ "movq 16(%2), %%r10 ;" -+ "adcx 16(%1), %%r10 ;" -+ "movq 24(%2), %%r11 ;" -+ "adcx 24(%1), %%r11 ;" -+ "cmovc %%eax, %%ecx ;" -+ "xorl %%eax, %%eax ;" -+ "adcx %%rcx, %%r8 ;" -+ "adcx %%rax, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcx %%rax, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcx %%rax, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $38, %%ecx ;" -+ "cmovc %%ecx, %%eax ;" -+ "addq %%rax, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -+} -+ -+static __always_inline void -+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) -+{ -+ asm volatile( -+ "mov $38, %%eax ;" -+ "movq (%2), %%r8 ;" -+ "addq (%1), %%r8 ;" -+ "movq 8(%2), %%r9 ;" -+ "adcq 8(%1), %%r9 ;" -+ "movq 16(%2), %%r10 ;" -+ "adcq 16(%1), %%r10 ;" -+ "movq 24(%2), %%r11 ;" -+ "adcq 24(%1), %%r11 ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%eax, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "adcq $0, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcq $0, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcq $0, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%eax, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -+} -+ -+static __always_inline void -+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) -+{ -+ asm volatile( -+ "mov $38, %%eax ;" -+ "movq (%1), %%r8 ;" -+ "subq (%2), %%r8 ;" -+ "movq 8(%1), %%r9 ;" -+ "sbbq 8(%2), %%r9 ;" -+ "movq 16(%1), %%r10 ;" -+ "sbbq 16(%2), %%r10 ;" -+ "movq 24(%1), %%r11 ;" -+ "sbbq 24(%2), %%r11 ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%eax, %%ecx ;" -+ "subq %%rcx, %%r8 ;" -+ "sbbq $0, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "sbbq $0, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "sbbq $0, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%eax, %%ecx ;" -+ "subq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(b) -+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -+} -+ -+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ -+static __always_inline void -+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) -+{ -+ const u64 a24 = 121666; -+ asm volatile( -+ "movq %2, %%rdx ;" -+ "mulx (%1), %%r8, %%r10 ;" -+ "mulx 8(%1), %%r9, %%r11 ;" -+ "addq %%r10, %%r9 ;" -+ "mulx 16(%1), %%r10, %%rax ;" -+ "adcq %%r11, %%r10 ;" -+ "mulx 24(%1), %%r11, %%rcx ;" -+ "adcq %%rax, %%r11 ;" -+ /**************************/ -+ "adcq $0, %%rcx ;" -+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ -+ "imul %%rdx, %%rcx ;" -+ "addq %%rcx, %%r8 ;" -+ "adcq $0, %%r9 ;" -+ "movq %%r9, 8(%0) ;" -+ "adcq $0, %%r10 ;" -+ "movq %%r10, 16(%0) ;" -+ "adcq $0, %%r11 ;" -+ "movq %%r11, 24(%0) ;" -+ "mov $0, %%ecx ;" -+ "cmovc %%edx, %%ecx ;" -+ "addq %%rcx, %%r8 ;" -+ "movq %%r8, (%0) ;" -+ : -+ : "r"(c), "r"(a), "r"(a24) -+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -+ "%r11"); -+} -+ -+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) -+{ -+ struct { -+ eltfp25519_1w_buffer buffer; -+ eltfp25519_1w x0, x1, x2; -+ } __aligned(32) m; -+ u64 *T[4]; -+ -+ T[0] = m.x0; -+ T[1] = c; /* x^(-1) */ -+ T[2] = m.x1; -+ T[3] = m.x2; -+ -+ copy_eltfp25519_1w(T[1], a); -+ sqrn_eltfp25519_1w_adx(T[1], 1); -+ copy_eltfp25519_1w(T[2], T[1]); -+ sqrn_eltfp25519_1w_adx(T[2], 2); -+ mul_eltfp25519_1w_adx(T[0], a, T[2]); -+ mul_eltfp25519_1w_adx(T[1], T[1], T[0]); -+ copy_eltfp25519_1w(T[2], T[1]); -+ sqrn_eltfp25519_1w_adx(T[2], 1); -+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_adx(T[2], 5); -+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_adx(T[2], 10); -+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]); -+ copy_eltfp25519_1w(T[3], T[2]); -+ sqrn_eltfp25519_1w_adx(T[3], 20); -+ mul_eltfp25519_1w_adx(T[3], T[3], T[2]); -+ sqrn_eltfp25519_1w_adx(T[3], 10); -+ mul_eltfp25519_1w_adx(T[3], T[3], T[0]); -+ copy_eltfp25519_1w(T[0], T[3]); -+ sqrn_eltfp25519_1w_adx(T[0], 50); -+ mul_eltfp25519_1w_adx(T[0], T[0], T[3]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_adx(T[2], 100); -+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]); -+ sqrn_eltfp25519_1w_adx(T[2], 50); -+ mul_eltfp25519_1w_adx(T[2], T[2], T[3]); -+ sqrn_eltfp25519_1w_adx(T[2], 5); -+ mul_eltfp25519_1w_adx(T[1], T[1], T[2]); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) -+{ -+ struct { -+ eltfp25519_1w_buffer buffer; -+ eltfp25519_1w x0, x1, x2; -+ } __aligned(32) m; -+ u64 *T[5]; -+ -+ T[0] = m.x0; -+ T[1] = c; /* x^(-1) */ -+ T[2] = m.x1; -+ T[3] = m.x2; -+ -+ copy_eltfp25519_1w(T[1], a); -+ sqrn_eltfp25519_1w_bmi2(T[1], 1); -+ copy_eltfp25519_1w(T[2], T[1]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 2); -+ mul_eltfp25519_1w_bmi2(T[0], a, T[2]); -+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); -+ copy_eltfp25519_1w(T[2], T[1]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 1); -+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 5); -+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 10); -+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); -+ copy_eltfp25519_1w(T[3], T[2]); -+ sqrn_eltfp25519_1w_bmi2(T[3], 20); -+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); -+ sqrn_eltfp25519_1w_bmi2(T[3], 10); -+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); -+ copy_eltfp25519_1w(T[0], T[3]); -+ sqrn_eltfp25519_1w_bmi2(T[0], 50); -+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); -+ copy_eltfp25519_1w(T[2], T[0]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 100); -+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 50); -+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); -+ sqrn_eltfp25519_1w_bmi2(T[2], 5); -+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c -+ * with a number such that 0 <= C < 2**255-19. -+ */ -+static __always_inline void fred_eltfp25519_1w(u64 *const c) -+{ -+ u64 tmp0 = 38, tmp1 = 19; -+ asm volatile( -+ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ -+ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ -+ -+ /* Add either 19 or 38 to c */ -+ "addq %4, %0 ;" -+ "adcq $0, %1 ;" -+ "adcq $0, %2 ;" -+ "adcq $0, %3 ;" -+ -+ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ -+ "movl $0, %k4 ;" -+ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ -+ "btrq $63, %3 ;" /* Clear bit 255 */ -+ -+ /* Subtract 19 if necessary */ -+ "subq %4, %0 ;" -+ "sbbq $0, %1 ;" -+ "sbbq $0, %2 ;" -+ "sbbq $0, %3 ;" -+ -+ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), -+ "+r"(tmp1) -+ : -+ : "memory", "cc"); -+} -+ -+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) -+{ -+ u64 temp; -+ asm volatile( -+ "test %9, %9 ;" -+ "movq %0, %8 ;" -+ "cmovnzq %4, %0 ;" -+ "cmovnzq %8, %4 ;" -+ "movq %1, %8 ;" -+ "cmovnzq %5, %1 ;" -+ "cmovnzq %8, %5 ;" -+ "movq %2, %8 ;" -+ "cmovnzq %6, %2 ;" -+ "cmovnzq %8, %6 ;" -+ "movq %3, %8 ;" -+ "cmovnzq %7, %3 ;" -+ "cmovnzq %8, %7 ;" -+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), -+ "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), -+ "=r"(temp) -+ : "r"(bit) -+ : "cc" -+ ); -+} -+ -+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) -+{ -+ asm volatile( -+ "test %4, %4 ;" -+ "cmovnzq %5, %0 ;" -+ "cmovnzq %6, %1 ;" -+ "cmovnzq %7, %2 ;" -+ "cmovnzq %8, %3 ;" -+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) -+ : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) -+ : "cc" -+ ); -+} -+ -+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], -+ const u8 private_key[CURVE25519_KEY_SIZE], -+ const u8 session_key[CURVE25519_KEY_SIZE]) -+{ -+ struct { -+ u64 buffer[4 * NUM_WORDS_ELTFP25519]; -+ u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -+ u64 workspace[6 * NUM_WORDS_ELTFP25519]; -+ u8 session[CURVE25519_KEY_SIZE]; -+ u8 private[CURVE25519_KEY_SIZE]; -+ } __aligned(32) m; -+ -+ int i = 0, j = 0; -+ u64 prev = 0; -+ u64 *const X1 = (u64 *)m.session; -+ u64 *const key = (u64 *)m.private; -+ u64 *const Px = m.coordinates + 0; -+ u64 *const Pz = m.coordinates + 4; -+ u64 *const Qx = m.coordinates + 8; -+ u64 *const Qz = m.coordinates + 12; -+ u64 *const X2 = Qx; -+ u64 *const Z2 = Qz; -+ u64 *const X3 = Px; -+ u64 *const Z3 = Pz; -+ u64 *const X2Z2 = Qx; -+ u64 *const X3Z3 = Px; -+ -+ u64 *const A = m.workspace + 0; -+ u64 *const B = m.workspace + 4; -+ u64 *const D = m.workspace + 8; -+ u64 *const C = m.workspace + 12; -+ u64 *const DA = m.workspace + 16; -+ u64 *const CB = m.workspace + 20; -+ u64 *const AB = A; -+ u64 *const DC = D; -+ u64 *const DACB = DA; -+ -+ memcpy(m.private, private_key, sizeof(m.private)); -+ memcpy(m.session, session_key, sizeof(m.session)); -+ -+ curve25519_clamp_secret(m.private); -+ -+ /* As in the draft: -+ * When receiving such an array, implementations of curve25519 -+ * MUST mask the most-significant bit in the final byte. This -+ * is done to preserve compatibility with point formats which -+ * reserve the sign bit for use in other protocols and to -+ * increase resistance to implementation fingerprinting -+ */ -+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; -+ -+ copy_eltfp25519_1w(Px, X1); -+ setzero_eltfp25519_1w(Pz); -+ setzero_eltfp25519_1w(Qx); -+ setzero_eltfp25519_1w(Qz); -+ -+ Pz[0] = 1; -+ Qx[0] = 1; -+ -+ /* main-loop */ -+ prev = 0; -+ j = 62; -+ for (i = 3; i >= 0; --i) { -+ while (j >= 0) { -+ u64 bit = (key[i] >> j) & 0x1; -+ u64 swap = bit ^ prev; -+ prev = bit; -+ -+ add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ -+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ -+ add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ -+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ -+ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ -+ -+ cselect(swap, A, C); -+ cselect(swap, B, D); -+ -+ sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ -+ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ -+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ -+ sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ -+ -+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ -+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ -+ -+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ -+ add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ -+ mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ -+ mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ -+ --j; -+ } -+ j = 63; -+ } -+ -+ inv_eltfp25519_1w_adx(A, Qz); -+ mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); -+ fred_eltfp25519_1w((u64 *)shared); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], -+ const u8 private_key[CURVE25519_KEY_SIZE]) -+{ -+ struct { -+ u64 buffer[4 * NUM_WORDS_ELTFP25519]; -+ u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -+ u64 workspace[4 * NUM_WORDS_ELTFP25519]; -+ u8 private[CURVE25519_KEY_SIZE]; -+ } __aligned(32) m; -+ -+ const int ite[4] = { 64, 64, 64, 63 }; -+ const int q = 3; -+ u64 swap = 1; -+ -+ int i = 0, j = 0, k = 0; -+ u64 *const key = (u64 *)m.private; -+ u64 *const Ur1 = m.coordinates + 0; -+ u64 *const Zr1 = m.coordinates + 4; -+ u64 *const Ur2 = m.coordinates + 8; -+ u64 *const Zr2 = m.coordinates + 12; -+ -+ u64 *const UZr1 = m.coordinates + 0; -+ u64 *const ZUr2 = m.coordinates + 8; -+ -+ u64 *const A = m.workspace + 0; -+ u64 *const B = m.workspace + 4; -+ u64 *const C = m.workspace + 8; -+ u64 *const D = m.workspace + 12; -+ -+ u64 *const AB = m.workspace + 0; -+ u64 *const CD = m.workspace + 8; -+ -+ const u64 *const P = table_ladder_8k; -+ -+ memcpy(m.private, private_key, sizeof(m.private)); -+ -+ curve25519_clamp_secret(m.private); -+ -+ setzero_eltfp25519_1w(Ur1); -+ setzero_eltfp25519_1w(Zr1); -+ setzero_eltfp25519_1w(Zr2); -+ Ur1[0] = 1; -+ Zr1[0] = 1; -+ Zr2[0] = 1; -+ -+ /* G-S */ -+ Ur2[3] = 0x1eaecdeee27cab34UL; -+ Ur2[2] = 0xadc7a0b9235d48e2UL; -+ Ur2[1] = 0xbbf095ae14b2edf8UL; -+ Ur2[0] = 0x7e94e1fec82faabdUL; -+ -+ /* main-loop */ -+ j = q; -+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { -+ while (j < ite[i]) { -+ u64 bit = (key[i] >> j) & 0x1; -+ k = (64 * i + j - q); -+ swap = swap ^ bit; -+ cswap(swap, Ur1, Ur2); -+ cswap(swap, Zr1, Zr2); -+ swap = bit; -+ /* Addition */ -+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -+ mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ -+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ -+ add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ -+ sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ -+ mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ -+ ++j; -+ } -+ j = 0; -+ } -+ -+ /* Doubling */ -+ for (i = 0; i < q; ++i) { -+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -+ sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ -+ copy_eltfp25519_1w(C, B); /* C = B */ -+ sub_eltfp25519_1w(B, A, B); /* B = A-B */ -+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ -+ add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ -+ mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ -+ } -+ -+ /* Convert to affine coordinates */ -+ inv_eltfp25519_1w_adx(A, Zr1); -+ mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); -+ fred_eltfp25519_1w((u64 *)session_key); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], -+ const u8 private_key[CURVE25519_KEY_SIZE], -+ const u8 session_key[CURVE25519_KEY_SIZE]) -+{ -+ struct { -+ u64 buffer[4 * NUM_WORDS_ELTFP25519]; -+ u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -+ u64 workspace[6 * NUM_WORDS_ELTFP25519]; -+ u8 session[CURVE25519_KEY_SIZE]; -+ u8 private[CURVE25519_KEY_SIZE]; -+ } __aligned(32) m; -+ -+ int i = 0, j = 0; -+ u64 prev = 0; -+ u64 *const X1 = (u64 *)m.session; -+ u64 *const key = (u64 *)m.private; -+ u64 *const Px = m.coordinates + 0; -+ u64 *const Pz = m.coordinates + 4; -+ u64 *const Qx = m.coordinates + 8; -+ u64 *const Qz = m.coordinates + 12; -+ u64 *const X2 = Qx; -+ u64 *const Z2 = Qz; -+ u64 *const X3 = Px; -+ u64 *const Z3 = Pz; -+ u64 *const X2Z2 = Qx; -+ u64 *const X3Z3 = Px; -+ -+ u64 *const A = m.workspace + 0; -+ u64 *const B = m.workspace + 4; -+ u64 *const D = m.workspace + 8; -+ u64 *const C = m.workspace + 12; -+ u64 *const DA = m.workspace + 16; -+ u64 *const CB = m.workspace + 20; -+ u64 *const AB = A; -+ u64 *const DC = D; -+ u64 *const DACB = DA; -+ -+ memcpy(m.private, private_key, sizeof(m.private)); -+ memcpy(m.session, session_key, sizeof(m.session)); -+ -+ curve25519_clamp_secret(m.private); -+ -+ /* As in the draft: -+ * When receiving such an array, implementations of curve25519 -+ * MUST mask the most-significant bit in the final byte. This -+ * is done to preserve compatibility with point formats which -+ * reserve the sign bit for use in other protocols and to -+ * increase resistance to implementation fingerprinting -+ */ -+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; -+ -+ copy_eltfp25519_1w(Px, X1); -+ setzero_eltfp25519_1w(Pz); -+ setzero_eltfp25519_1w(Qx); -+ setzero_eltfp25519_1w(Qz); -+ -+ Pz[0] = 1; -+ Qx[0] = 1; -+ -+ /* main-loop */ -+ prev = 0; -+ j = 62; -+ for (i = 3; i >= 0; --i) { -+ while (j >= 0) { -+ u64 bit = (key[i] >> j) & 0x1; -+ u64 swap = bit ^ prev; -+ prev = bit; -+ -+ add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ -+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ -+ add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ -+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ -+ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ -+ -+ cselect(swap, A, C); -+ cselect(swap, B, D); -+ -+ sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ -+ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ -+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ -+ sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ -+ -+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ -+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ -+ -+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ -+ add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ -+ mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ -+ mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ -+ --j; -+ } -+ j = 63; -+ } -+ -+ inv_eltfp25519_1w_bmi2(A, Qz); -+ mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); -+ fred_eltfp25519_1w((u64 *)shared); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], -+ const u8 private_key[CURVE25519_KEY_SIZE]) -+{ -+ struct { -+ u64 buffer[4 * NUM_WORDS_ELTFP25519]; -+ u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -+ u64 workspace[4 * NUM_WORDS_ELTFP25519]; -+ u8 private[CURVE25519_KEY_SIZE]; -+ } __aligned(32) m; -+ -+ const int ite[4] = { 64, 64, 64, 63 }; -+ const int q = 3; -+ u64 swap = 1; -+ -+ int i = 0, j = 0, k = 0; -+ u64 *const key = (u64 *)m.private; -+ u64 *const Ur1 = m.coordinates + 0; -+ u64 *const Zr1 = m.coordinates + 4; -+ u64 *const Ur2 = m.coordinates + 8; -+ u64 *const Zr2 = m.coordinates + 12; -+ -+ u64 *const UZr1 = m.coordinates + 0; -+ u64 *const ZUr2 = m.coordinates + 8; -+ -+ u64 *const A = m.workspace + 0; -+ u64 *const B = m.workspace + 4; -+ u64 *const C = m.workspace + 8; -+ u64 *const D = m.workspace + 12; -+ -+ u64 *const AB = m.workspace + 0; -+ u64 *const CD = m.workspace + 8; -+ -+ const u64 *const P = table_ladder_8k; -+ -+ memcpy(m.private, private_key, sizeof(m.private)); -+ -+ curve25519_clamp_secret(m.private); -+ -+ setzero_eltfp25519_1w(Ur1); -+ setzero_eltfp25519_1w(Zr1); -+ setzero_eltfp25519_1w(Zr2); -+ Ur1[0] = 1; -+ Zr1[0] = 1; -+ Zr2[0] = 1; -+ -+ /* G-S */ -+ Ur2[3] = 0x1eaecdeee27cab34UL; -+ Ur2[2] = 0xadc7a0b9235d48e2UL; -+ Ur2[1] = 0xbbf095ae14b2edf8UL; -+ Ur2[0] = 0x7e94e1fec82faabdUL; -+ -+ /* main-loop */ -+ j = q; -+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { -+ while (j < ite[i]) { -+ u64 bit = (key[i] >> j) & 0x1; -+ k = (64 * i + j - q); -+ swap = swap ^ bit; -+ cswap(swap, Ur1, Ur2); -+ cswap(swap, Zr1, Zr2); -+ swap = bit; -+ /* Addition */ -+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -+ mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ -+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ -+ add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ -+ sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ -+ mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ -+ ++j; -+ } -+ j = 0; -+ } -+ -+ /* Doubling */ -+ for (i = 0; i < q; ++i) { -+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -+ sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ -+ copy_eltfp25519_1w(C, B); /* C = B */ -+ sub_eltfp25519_1w(B, A, B); /* B = A-B */ -+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ -+ add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ -+ mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ -+ } -+ -+ /* Convert to affine coordinates */ -+ inv_eltfp25519_1w_bmi2(A, Zr1); -+ mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); -+ fred_eltfp25519_1w((u64 *)session_key); -+ -+ memzero_explicit(&m, sizeof(m)); -+} -+ -+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE], -+ const u8 basepoint[CURVE25519_KEY_SIZE]) -+{ -+ if (static_branch_likely(&curve25519_use_adx)) -+ curve25519_adx(mypublic, secret, basepoint); -+ else if (static_branch_likely(&curve25519_use_bmi2)) -+ curve25519_bmi2(mypublic, secret, basepoint); -+ else -+ curve25519_generic(mypublic, secret, basepoint); -+} -+EXPORT_SYMBOL(curve25519_arch); -+ -+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE]) -+{ -+ if (static_branch_likely(&curve25519_use_adx)) -+ curve25519_adx_base(pub, secret); -+ else if (static_branch_likely(&curve25519_use_bmi2)) -+ curve25519_bmi2_base(pub, secret); -+ else -+ curve25519_generic(pub, secret, curve25519_base_point); -+} -+EXPORT_SYMBOL(curve25519_base_arch); -+ -+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, -+ unsigned int len) -+{ -+ u8 *secret = kpp_tfm_ctx(tfm); -+ -+ if (!len) -+ curve25519_generate_secret(secret); -+ else if (len == CURVE25519_KEY_SIZE && -+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) -+ memcpy(secret, buf, CURVE25519_KEY_SIZE); -+ else -+ return -EINVAL; -+ return 0; -+} -+ -+static int curve25519_generate_public_key(struct kpp_request *req) -+{ -+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); -+ const u8 *secret = kpp_tfm_ctx(tfm); -+ u8 buf[CURVE25519_KEY_SIZE]; -+ int copied, nbytes; -+ -+ if (req->src) -+ return -EINVAL; -+ -+ curve25519_base_arch(buf, secret); -+ -+ /* might want less than we've got */ -+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); -+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, -+ nbytes), -+ buf, nbytes); -+ if (copied != nbytes) -+ return -EINVAL; -+ return 0; -+} -+ -+static int curve25519_compute_shared_secret(struct kpp_request *req) -+{ -+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); -+ const u8 *secret = kpp_tfm_ctx(tfm); -+ u8 public_key[CURVE25519_KEY_SIZE]; -+ u8 buf[CURVE25519_KEY_SIZE]; -+ int copied, nbytes; -+ -+ if (!req->src) -+ return -EINVAL; -+ -+ copied = sg_copy_to_buffer(req->src, -+ sg_nents_for_len(req->src, -+ CURVE25519_KEY_SIZE), -+ public_key, CURVE25519_KEY_SIZE); -+ if (copied != CURVE25519_KEY_SIZE) -+ return -EINVAL; -+ -+ curve25519_arch(buf, secret, public_key); -+ -+ /* might want less than we've got */ -+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); -+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, -+ nbytes), -+ buf, nbytes); -+ if (copied != nbytes) -+ return -EINVAL; -+ return 0; -+} -+ -+static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -+{ -+ return CURVE25519_KEY_SIZE; -+} -+ -+static struct kpp_alg curve25519_alg = { -+ .base.cra_name = "curve25519", -+ .base.cra_driver_name = "curve25519-x86", -+ .base.cra_priority = 200, -+ .base.cra_module = THIS_MODULE, -+ .base.cra_ctxsize = CURVE25519_KEY_SIZE, -+ -+ .set_secret = curve25519_set_secret, -+ .generate_public_key = curve25519_generate_public_key, -+ .compute_shared_secret = curve25519_compute_shared_secret, -+ .max_size = curve25519_max_size, -+}; -+ -+static int __init curve25519_mod_init(void) -+{ -+ if (boot_cpu_has(X86_FEATURE_BMI2)) -+ static_branch_enable(&curve25519_use_bmi2); -+ else if (boot_cpu_has(X86_FEATURE_ADX)) -+ static_branch_enable(&curve25519_use_adx); -+ else -+ return 0; -+ return crypto_register_kpp(&curve25519_alg); -+} -+ -+static void __exit curve25519_mod_exit(void) -+{ -+ if (boot_cpu_has(X86_FEATURE_BMI2) || -+ boot_cpu_has(X86_FEATURE_ADX)) -+ crypto_unregister_kpp(&curve25519_alg); -+} -+ -+module_init(curve25519_mod_init); -+module_exit(curve25519_mod_exit); -+ -+MODULE_ALIAS_CRYPTO("curve25519"); -+MODULE_ALIAS_CRYPTO("curve25519-x86"); -+MODULE_LICENSE("GPL v2"); ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -269,6 +269,12 @@ config CRYPTO_CURVE25519 - select CRYPTO_KPP - select CRYPTO_LIB_CURVE25519_GENERIC - -+config CRYPTO_CURVE25519_X86 -+ tristate "x86_64 accelerated Curve25519 scalar multiplication library" -+ depends on X86 && 64BIT -+ select CRYPTO_LIB_CURVE25519_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519 -+ - comment "Authenticated Encryption with Associated Data" - - config CRYPTO_CCM diff --git a/target/linux/generic/backport-5.4/080-wireguard-0030-crypto-arm-curve25519-import-Bernstein-and-Schwabe-s.patch b/target/linux/generic/backport-5.4/080-wireguard-0030-crypto-arm-curve25519-import-Bernstein-and-Schwabe-s.patch deleted file mode 100644 index 8fda25d60a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0030-crypto-arm-curve25519-import-Bernstein-and-Schwabe-s.patch +++ /dev/null @@ -1,2135 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:37 +0100 -Subject: [PATCH] crypto: arm/curve25519 - import Bernstein and Schwabe's - Curve25519 ARM implementation - -commit f0fb006b604f98e2309a30f34ef455ac734f7c1c upstream. - -This comes from Dan Bernstein and Peter Schwabe's public domain NEON -code, and is included here in raw form so that subsequent commits that -fix these up for the kernel can see how it has changed. This code does -have some entirely cosmetic formatting differences, adding indentation -and so forth, so that when we actually port it for use in the kernel in -the subsequent commit, it's obvious what's changed in the process. - -This code originates from SUPERCOP 20180818, available at -<https://bench.cr.yp.to/supercop.html>. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/curve25519-core.S | 2105 +++++++++++++++++++++++++++++ - 1 file changed, 2105 insertions(+) - create mode 100644 arch/arm/crypto/curve25519-core.S - ---- /dev/null -+++ b/arch/arm/crypto/curve25519-core.S -@@ -0,0 +1,2105 @@ -+/* -+ * Public domain code from Daniel J. Bernstein and Peter Schwabe, from -+ * SUPERCOP's curve25519/neon2/scalarmult.s. -+ */ -+ -+.fpu neon -+.text -+.align 4 -+.global _crypto_scalarmult_curve25519_neon2 -+.global crypto_scalarmult_curve25519_neon2 -+.type _crypto_scalarmult_curve25519_neon2 STT_FUNC -+.type crypto_scalarmult_curve25519_neon2 STT_FUNC -+ _crypto_scalarmult_curve25519_neon2: -+ crypto_scalarmult_curve25519_neon2: -+ vpush {q4, q5, q6, q7} -+ mov r12, sp -+ sub sp, sp, #736 -+ and sp, sp, #0xffffffe0 -+ strd r4, [sp, #0] -+ strd r6, [sp, #8] -+ strd r8, [sp, #16] -+ strd r10, [sp, #24] -+ str r12, [sp, #480] -+ str r14, [sp, #484] -+ mov r0, r0 -+ mov r1, r1 -+ mov r2, r2 -+ add r3, sp, #32 -+ ldr r4, =0 -+ ldr r5, =254 -+ vmov.i32 q0, #1 -+ vshr.u64 q1, q0, #7 -+ vshr.u64 q0, q0, #8 -+ vmov.i32 d4, #19 -+ vmov.i32 d5, #38 -+ add r6, sp, #512 -+ vst1.8 {d2-d3}, [r6, : 128] -+ add r6, sp, #528 -+ vst1.8 {d0-d1}, [r6, : 128] -+ add r6, sp, #544 -+ vst1.8 {d4-d5}, [r6, : 128] -+ add r6, r3, #0 -+ vmov.i32 q2, #0 -+ vst1.8 {d4-d5}, [r6, : 128]! -+ vst1.8 {d4-d5}, [r6, : 128]! -+ vst1.8 d4, [r6, : 64] -+ add r6, r3, #0 -+ ldr r7, =960 -+ sub r7, r7, #2 -+ neg r7, r7 -+ sub r7, r7, r7, LSL #7 -+ str r7, [r6] -+ add r6, sp, #704 -+ vld1.8 {d4-d5}, [r1]! -+ vld1.8 {d6-d7}, [r1] -+ vst1.8 {d4-d5}, [r6, : 128]! -+ vst1.8 {d6-d7}, [r6, : 128] -+ sub r1, r6, #16 -+ ldrb r6, [r1] -+ and r6, r6, #248 -+ strb r6, [r1] -+ ldrb r6, [r1, #31] -+ and r6, r6, #127 -+ orr r6, r6, #64 -+ strb r6, [r1, #31] -+ vmov.i64 q2, #0xffffffff -+ vshr.u64 q3, q2, #7 -+ vshr.u64 q2, q2, #6 -+ vld1.8 {d8}, [r2] -+ vld1.8 {d10}, [r2] -+ add r2, r2, #6 -+ vld1.8 {d12}, [r2] -+ vld1.8 {d14}, [r2] -+ add r2, r2, #6 -+ vld1.8 {d16}, [r2] -+ add r2, r2, #4 -+ vld1.8 {d18}, [r2] -+ vld1.8 {d20}, [r2] -+ add r2, r2, #6 -+ vld1.8 {d22}, [r2] -+ add r2, r2, #2 -+ vld1.8 {d24}, [r2] -+ vld1.8 {d26}, [r2] -+ vshr.u64 q5, q5, #26 -+ vshr.u64 q6, q6, #3 -+ vshr.u64 q7, q7, #29 -+ vshr.u64 q8, q8, #6 -+ vshr.u64 q10, q10, #25 -+ vshr.u64 q11, q11, #3 -+ vshr.u64 q12, q12, #12 -+ vshr.u64 q13, q13, #38 -+ vand q4, q4, q2 -+ vand q6, q6, q2 -+ vand q8, q8, q2 -+ vand q10, q10, q2 -+ vand q2, q12, q2 -+ vand q5, q5, q3 -+ vand q7, q7, q3 -+ vand q9, q9, q3 -+ vand q11, q11, q3 -+ vand q3, q13, q3 -+ add r2, r3, #48 -+ vadd.i64 q12, q4, q1 -+ vadd.i64 q13, q10, q1 -+ vshr.s64 q12, q12, #26 -+ vshr.s64 q13, q13, #26 -+ vadd.i64 q5, q5, q12 -+ vshl.i64 q12, q12, #26 -+ vadd.i64 q14, q5, q0 -+ vadd.i64 q11, q11, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q15, q11, q0 -+ vsub.i64 q4, q4, q12 -+ vshr.s64 q12, q14, #25 -+ vsub.i64 q10, q10, q13 -+ vshr.s64 q13, q15, #25 -+ vadd.i64 q6, q6, q12 -+ vshl.i64 q12, q12, #25 -+ vadd.i64 q14, q6, q1 -+ vadd.i64 q2, q2, q13 -+ vsub.i64 q5, q5, q12 -+ vshr.s64 q12, q14, #26 -+ vshl.i64 q13, q13, #25 -+ vadd.i64 q14, q2, q1 -+ vadd.i64 q7, q7, q12 -+ vshl.i64 q12, q12, #26 -+ vadd.i64 q15, q7, q0 -+ vsub.i64 q11, q11, q13 -+ vshr.s64 q13, q14, #26 -+ vsub.i64 q6, q6, q12 -+ vshr.s64 q12, q15, #25 -+ vadd.i64 q3, q3, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q14, q3, q0 -+ vadd.i64 q8, q8, q12 -+ vshl.i64 q12, q12, #25 -+ vadd.i64 q15, q8, q1 -+ add r2, r2, #8 -+ vsub.i64 q2, q2, q13 -+ vshr.s64 q13, q14, #25 -+ vsub.i64 q7, q7, q12 -+ vshr.s64 q12, q15, #26 -+ vadd.i64 q14, q13, q13 -+ vadd.i64 q9, q9, q12 -+ vtrn.32 d12, d14 -+ vshl.i64 q12, q12, #26 -+ vtrn.32 d13, d15 -+ vadd.i64 q0, q9, q0 -+ vadd.i64 q4, q4, q14 -+ vst1.8 d12, [r2, : 64]! -+ vshl.i64 q6, q13, #4 -+ vsub.i64 q7, q8, q12 -+ vshr.s64 q0, q0, #25 -+ vadd.i64 q4, q4, q6 -+ vadd.i64 q6, q10, q0 -+ vshl.i64 q0, q0, #25 -+ vadd.i64 q8, q6, q1 -+ vadd.i64 q4, q4, q13 -+ vshl.i64 q10, q13, #25 -+ vadd.i64 q1, q4, q1 -+ vsub.i64 q0, q9, q0 -+ vshr.s64 q8, q8, #26 -+ vsub.i64 q3, q3, q10 -+ vtrn.32 d14, d0 -+ vshr.s64 q1, q1, #26 -+ vtrn.32 d15, d1 -+ vadd.i64 q0, q11, q8 -+ vst1.8 d14, [r2, : 64] -+ vshl.i64 q7, q8, #26 -+ vadd.i64 q5, q5, q1 -+ vtrn.32 d4, d6 -+ vshl.i64 q1, q1, #26 -+ vtrn.32 d5, d7 -+ vsub.i64 q3, q6, q7 -+ add r2, r2, #16 -+ vsub.i64 q1, q4, q1 -+ vst1.8 d4, [r2, : 64] -+ vtrn.32 d6, d0 -+ vtrn.32 d7, d1 -+ sub r2, r2, #8 -+ vtrn.32 d2, d10 -+ vtrn.32 d3, d11 -+ vst1.8 d6, [r2, : 64] -+ sub r2, r2, #24 -+ vst1.8 d2, [r2, : 64] -+ add r2, r3, #96 -+ vmov.i32 q0, #0 -+ vmov.i64 d2, #0xff -+ vmov.i64 d3, #0 -+ vshr.u32 q1, q1, #7 -+ vst1.8 {d2-d3}, [r2, : 128]! -+ vst1.8 {d0-d1}, [r2, : 128]! -+ vst1.8 d0, [r2, : 64] -+ add r2, r3, #144 -+ vmov.i32 q0, #0 -+ vst1.8 {d0-d1}, [r2, : 128]! -+ vst1.8 {d0-d1}, [r2, : 128]! -+ vst1.8 d0, [r2, : 64] -+ add r2, r3, #240 -+ vmov.i32 q0, #0 -+ vmov.i64 d2, #0xff -+ vmov.i64 d3, #0 -+ vshr.u32 q1, q1, #7 -+ vst1.8 {d2-d3}, [r2, : 128]! -+ vst1.8 {d0-d1}, [r2, : 128]! -+ vst1.8 d0, [r2, : 64] -+ add r2, r3, #48 -+ add r6, r3, #192 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d4}, [r2, : 64] -+ vst1.8 {d0-d1}, [r6, : 128]! -+ vst1.8 {d2-d3}, [r6, : 128]! -+ vst1.8 d4, [r6, : 64] -+._mainloop: -+ mov r2, r5, LSR #3 -+ and r6, r5, #7 -+ ldrb r2, [r1, r2] -+ mov r2, r2, LSR r6 -+ and r2, r2, #1 -+ str r5, [sp, #488] -+ eor r4, r4, r2 -+ str r2, [sp, #492] -+ neg r2, r4 -+ add r4, r3, #96 -+ add r5, r3, #192 -+ add r6, r3, #144 -+ vld1.8 {d8-d9}, [r4, : 128]! -+ add r7, r3, #240 -+ vld1.8 {d10-d11}, [r5, : 128]! -+ veor q6, q4, q5 -+ vld1.8 {d14-d15}, [r6, : 128]! -+ vdup.i32 q8, r2 -+ vld1.8 {d18-d19}, [r7, : 128]! -+ veor q10, q7, q9 -+ vld1.8 {d22-d23}, [r4, : 128]! -+ vand q6, q6, q8 -+ vld1.8 {d24-d25}, [r5, : 128]! -+ vand q10, q10, q8 -+ vld1.8 {d26-d27}, [r6, : 128]! -+ veor q4, q4, q6 -+ vld1.8 {d28-d29}, [r7, : 128]! -+ veor q5, q5, q6 -+ vld1.8 {d0}, [r4, : 64] -+ veor q6, q7, q10 -+ vld1.8 {d2}, [r5, : 64] -+ veor q7, q9, q10 -+ vld1.8 {d4}, [r6, : 64] -+ veor q9, q11, q12 -+ vld1.8 {d6}, [r7, : 64] -+ veor q10, q0, q1 -+ sub r2, r4, #32 -+ vand q9, q9, q8 -+ sub r4, r5, #32 -+ vand q10, q10, q8 -+ sub r5, r6, #32 -+ veor q11, q11, q9 -+ sub r6, r7, #32 -+ veor q0, q0, q10 -+ veor q9, q12, q9 -+ veor q1, q1, q10 -+ veor q10, q13, q14 -+ veor q12, q2, q3 -+ vand q10, q10, q8 -+ vand q8, q12, q8 -+ veor q12, q13, q10 -+ veor q2, q2, q8 -+ veor q10, q14, q10 -+ veor q3, q3, q8 -+ vadd.i32 q8, q4, q6 -+ vsub.i32 q4, q4, q6 -+ vst1.8 {d16-d17}, [r2, : 128]! -+ vadd.i32 q6, q11, q12 -+ vst1.8 {d8-d9}, [r5, : 128]! -+ vsub.i32 q4, q11, q12 -+ vst1.8 {d12-d13}, [r2, : 128]! -+ vadd.i32 q6, q0, q2 -+ vst1.8 {d8-d9}, [r5, : 128]! -+ vsub.i32 q0, q0, q2 -+ vst1.8 d12, [r2, : 64] -+ vadd.i32 q2, q5, q7 -+ vst1.8 d0, [r5, : 64] -+ vsub.i32 q0, q5, q7 -+ vst1.8 {d4-d5}, [r4, : 128]! -+ vadd.i32 q2, q9, q10 -+ vst1.8 {d0-d1}, [r6, : 128]! -+ vsub.i32 q0, q9, q10 -+ vst1.8 {d4-d5}, [r4, : 128]! -+ vadd.i32 q2, q1, q3 -+ vst1.8 {d0-d1}, [r6, : 128]! -+ vsub.i32 q0, q1, q3 -+ vst1.8 d4, [r4, : 64] -+ vst1.8 d0, [r6, : 64] -+ add r2, sp, #544 -+ add r4, r3, #96 -+ add r5, r3, #144 -+ vld1.8 {d0-d1}, [r2, : 128] -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vld1.8 {d4-d5}, [r5, : 128]! -+ vzip.i32 q1, q2 -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vld1.8 {d8-d9}, [r5, : 128]! -+ vshl.i32 q5, q1, #1 -+ vzip.i32 q3, q4 -+ vshl.i32 q6, q2, #1 -+ vld1.8 {d14}, [r4, : 64] -+ vshl.i32 q8, q3, #1 -+ vld1.8 {d15}, [r5, : 64] -+ vshl.i32 q9, q4, #1 -+ vmul.i32 d21, d7, d1 -+ vtrn.32 d14, d15 -+ vmul.i32 q11, q4, q0 -+ vmul.i32 q0, q7, q0 -+ vmull.s32 q12, d2, d2 -+ vmlal.s32 q12, d11, d1 -+ vmlal.s32 q12, d12, d0 -+ vmlal.s32 q12, d13, d23 -+ vmlal.s32 q12, d16, d22 -+ vmlal.s32 q12, d7, d21 -+ vmull.s32 q10, d2, d11 -+ vmlal.s32 q10, d4, d1 -+ vmlal.s32 q10, d13, d0 -+ vmlal.s32 q10, d6, d23 -+ vmlal.s32 q10, d17, d22 -+ vmull.s32 q13, d10, d4 -+ vmlal.s32 q13, d11, d3 -+ vmlal.s32 q13, d13, d1 -+ vmlal.s32 q13, d16, d0 -+ vmlal.s32 q13, d17, d23 -+ vmlal.s32 q13, d8, d22 -+ vmull.s32 q1, d10, d5 -+ vmlal.s32 q1, d11, d4 -+ vmlal.s32 q1, d6, d1 -+ vmlal.s32 q1, d17, d0 -+ vmlal.s32 q1, d8, d23 -+ vmull.s32 q14, d10, d6 -+ vmlal.s32 q14, d11, d13 -+ vmlal.s32 q14, d4, d4 -+ vmlal.s32 q14, d17, d1 -+ vmlal.s32 q14, d18, d0 -+ vmlal.s32 q14, d9, d23 -+ vmull.s32 q11, d10, d7 -+ vmlal.s32 q11, d11, d6 -+ vmlal.s32 q11, d12, d5 -+ vmlal.s32 q11, d8, d1 -+ vmlal.s32 q11, d19, d0 -+ vmull.s32 q15, d10, d8 -+ vmlal.s32 q15, d11, d17 -+ vmlal.s32 q15, d12, d6 -+ vmlal.s32 q15, d13, d5 -+ vmlal.s32 q15, d19, d1 -+ vmlal.s32 q15, d14, d0 -+ vmull.s32 q2, d10, d9 -+ vmlal.s32 q2, d11, d8 -+ vmlal.s32 q2, d12, d7 -+ vmlal.s32 q2, d13, d6 -+ vmlal.s32 q2, d14, d1 -+ vmull.s32 q0, d15, d1 -+ vmlal.s32 q0, d10, d14 -+ vmlal.s32 q0, d11, d19 -+ vmlal.s32 q0, d12, d8 -+ vmlal.s32 q0, d13, d17 -+ vmlal.s32 q0, d6, d6 -+ add r2, sp, #512 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmull.s32 q3, d16, d7 -+ vmlal.s32 q3, d10, d15 -+ vmlal.s32 q3, d11, d14 -+ vmlal.s32 q3, d12, d9 -+ vmlal.s32 q3, d13, d8 -+ add r2, sp, #528 -+ vld1.8 {d8-d9}, [r2, : 128] -+ vadd.i64 q5, q12, q9 -+ vadd.i64 q6, q15, q9 -+ vshr.s64 q5, q5, #26 -+ vshr.s64 q6, q6, #26 -+ vadd.i64 q7, q10, q5 -+ vshl.i64 q5, q5, #26 -+ vadd.i64 q8, q7, q4 -+ vadd.i64 q2, q2, q6 -+ vshl.i64 q6, q6, #26 -+ vadd.i64 q10, q2, q4 -+ vsub.i64 q5, q12, q5 -+ vshr.s64 q8, q8, #25 -+ vsub.i64 q6, q15, q6 -+ vshr.s64 q10, q10, #25 -+ vadd.i64 q12, q13, q8 -+ vshl.i64 q8, q8, #25 -+ vadd.i64 q13, q12, q9 -+ vadd.i64 q0, q0, q10 -+ vsub.i64 q7, q7, q8 -+ vshr.s64 q8, q13, #26 -+ vshl.i64 q10, q10, #25 -+ vadd.i64 q13, q0, q9 -+ vadd.i64 q1, q1, q8 -+ vshl.i64 q8, q8, #26 -+ vadd.i64 q15, q1, q4 -+ vsub.i64 q2, q2, q10 -+ vshr.s64 q10, q13, #26 -+ vsub.i64 q8, q12, q8 -+ vshr.s64 q12, q15, #25 -+ vadd.i64 q3, q3, q10 -+ vshl.i64 q10, q10, #26 -+ vadd.i64 q13, q3, q4 -+ vadd.i64 q14, q14, q12 -+ add r2, r3, #288 -+ vshl.i64 q12, q12, #25 -+ add r4, r3, #336 -+ vadd.i64 q15, q14, q9 -+ add r2, r2, #8 -+ vsub.i64 q0, q0, q10 -+ add r4, r4, #8 -+ vshr.s64 q10, q13, #25 -+ vsub.i64 q1, q1, q12 -+ vshr.s64 q12, q15, #26 -+ vadd.i64 q13, q10, q10 -+ vadd.i64 q11, q11, q12 -+ vtrn.32 d16, d2 -+ vshl.i64 q12, q12, #26 -+ vtrn.32 d17, d3 -+ vadd.i64 q1, q11, q4 -+ vadd.i64 q4, q5, q13 -+ vst1.8 d16, [r2, : 64]! -+ vshl.i64 q5, q10, #4 -+ vst1.8 d17, [r4, : 64]! -+ vsub.i64 q8, q14, q12 -+ vshr.s64 q1, q1, #25 -+ vadd.i64 q4, q4, q5 -+ vadd.i64 q5, q6, q1 -+ vshl.i64 q1, q1, #25 -+ vadd.i64 q6, q5, q9 -+ vadd.i64 q4, q4, q10 -+ vshl.i64 q10, q10, #25 -+ vadd.i64 q9, q4, q9 -+ vsub.i64 q1, q11, q1 -+ vshr.s64 q6, q6, #26 -+ vsub.i64 q3, q3, q10 -+ vtrn.32 d16, d2 -+ vshr.s64 q9, q9, #26 -+ vtrn.32 d17, d3 -+ vadd.i64 q1, q2, q6 -+ vst1.8 d16, [r2, : 64] -+ vshl.i64 q2, q6, #26 -+ vst1.8 d17, [r4, : 64] -+ vadd.i64 q6, q7, q9 -+ vtrn.32 d0, d6 -+ vshl.i64 q7, q9, #26 -+ vtrn.32 d1, d7 -+ vsub.i64 q2, q5, q2 -+ add r2, r2, #16 -+ vsub.i64 q3, q4, q7 -+ vst1.8 d0, [r2, : 64] -+ add r4, r4, #16 -+ vst1.8 d1, [r4, : 64] -+ vtrn.32 d4, d2 -+ vtrn.32 d5, d3 -+ sub r2, r2, #8 -+ sub r4, r4, #8 -+ vtrn.32 d6, d12 -+ vtrn.32 d7, d13 -+ vst1.8 d4, [r2, : 64] -+ vst1.8 d5, [r4, : 64] -+ sub r2, r2, #24 -+ sub r4, r4, #24 -+ vst1.8 d6, [r2, : 64] -+ vst1.8 d7, [r4, : 64] -+ add r2, r3, #240 -+ add r4, r3, #96 -+ vld1.8 {d0-d1}, [r4, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vld1.8 {d4}, [r4, : 64] -+ add r4, r3, #144 -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vtrn.32 q0, q3 -+ vld1.8 {d8-d9}, [r4, : 128]! -+ vshl.i32 q5, q0, #4 -+ vtrn.32 q1, q4 -+ vshl.i32 q6, q3, #4 -+ vadd.i32 q5, q5, q0 -+ vadd.i32 q6, q6, q3 -+ vshl.i32 q7, q1, #4 -+ vld1.8 {d5}, [r4, : 64] -+ vshl.i32 q8, q4, #4 -+ vtrn.32 d4, d5 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d18-d19}, [r2, : 128]! -+ vshl.i32 q10, q2, #4 -+ vld1.8 {d22-d23}, [r2, : 128]! -+ vadd.i32 q10, q10, q2 -+ vld1.8 {d24}, [r2, : 64] -+ vadd.i32 q5, q5, q0 -+ add r2, r3, #192 -+ vld1.8 {d26-d27}, [r2, : 128]! -+ vadd.i32 q6, q6, q3 -+ vld1.8 {d28-d29}, [r2, : 128]! -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d25}, [r2, : 64] -+ vadd.i32 q10, q10, q2 -+ vtrn.32 q9, q13 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q5, q5, q0 -+ vtrn.32 q11, q14 -+ vadd.i32 q6, q6, q3 -+ add r2, sp, #560 -+ vadd.i32 q10, q10, q2 -+ vtrn.32 d24, d25 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q6, q13, #1 -+ add r2, sp, #576 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vshl.i32 q10, q14, #1 -+ add r2, sp, #592 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q15, q12, #1 -+ vadd.i32 q8, q8, q4 -+ vext.32 d10, d31, d30, #0 -+ vadd.i32 q7, q7, q1 -+ add r2, sp, #608 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q8, d18, d5 -+ vmlal.s32 q8, d26, d4 -+ vmlal.s32 q8, d19, d9 -+ vmlal.s32 q8, d27, d3 -+ vmlal.s32 q8, d22, d8 -+ vmlal.s32 q8, d28, d2 -+ vmlal.s32 q8, d23, d7 -+ vmlal.s32 q8, d29, d1 -+ vmlal.s32 q8, d24, d6 -+ vmlal.s32 q8, d25, d0 -+ add r2, sp, #624 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q2, d18, d4 -+ vmlal.s32 q2, d12, d9 -+ vmlal.s32 q2, d13, d8 -+ vmlal.s32 q2, d19, d3 -+ vmlal.s32 q2, d22, d2 -+ vmlal.s32 q2, d23, d1 -+ vmlal.s32 q2, d24, d0 -+ add r2, sp, #640 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vmull.s32 q7, d18, d9 -+ vmlal.s32 q7, d26, d3 -+ vmlal.s32 q7, d19, d8 -+ vmlal.s32 q7, d27, d2 -+ vmlal.s32 q7, d22, d7 -+ vmlal.s32 q7, d28, d1 -+ vmlal.s32 q7, d23, d6 -+ vmlal.s32 q7, d29, d0 -+ add r2, sp, #656 -+ vst1.8 {d10-d11}, [r2, : 128] -+ vmull.s32 q5, d18, d3 -+ vmlal.s32 q5, d19, d2 -+ vmlal.s32 q5, d22, d1 -+ vmlal.s32 q5, d23, d0 -+ vmlal.s32 q5, d12, d8 -+ add r2, sp, #672 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q4, d18, d8 -+ vmlal.s32 q4, d26, d2 -+ vmlal.s32 q4, d19, d7 -+ vmlal.s32 q4, d27, d1 -+ vmlal.s32 q4, d22, d6 -+ vmlal.s32 q4, d28, d0 -+ vmull.s32 q8, d18, d7 -+ vmlal.s32 q8, d26, d1 -+ vmlal.s32 q8, d19, d6 -+ vmlal.s32 q8, d27, d0 -+ add r2, sp, #576 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q7, d24, d21 -+ vmlal.s32 q7, d25, d20 -+ vmlal.s32 q4, d23, d21 -+ vmlal.s32 q4, d29, d20 -+ vmlal.s32 q8, d22, d21 -+ vmlal.s32 q8, d28, d20 -+ vmlal.s32 q5, d24, d20 -+ add r2, sp, #576 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q7, d18, d6 -+ vmlal.s32 q7, d26, d0 -+ add r2, sp, #656 -+ vld1.8 {d30-d31}, [r2, : 128] -+ vmlal.s32 q2, d30, d21 -+ vmlal.s32 q7, d19, d21 -+ vmlal.s32 q7, d27, d20 -+ add r2, sp, #624 -+ vld1.8 {d26-d27}, [r2, : 128] -+ vmlal.s32 q4, d25, d27 -+ vmlal.s32 q8, d29, d27 -+ vmlal.s32 q8, d25, d26 -+ vmlal.s32 q7, d28, d27 -+ vmlal.s32 q7, d29, d26 -+ add r2, sp, #608 -+ vld1.8 {d28-d29}, [r2, : 128] -+ vmlal.s32 q4, d24, d29 -+ vmlal.s32 q8, d23, d29 -+ vmlal.s32 q8, d24, d28 -+ vmlal.s32 q7, d22, d29 -+ vmlal.s32 q7, d23, d28 -+ add r2, sp, #608 -+ vst1.8 {d8-d9}, [r2, : 128] -+ add r2, sp, #560 -+ vld1.8 {d8-d9}, [r2, : 128] -+ vmlal.s32 q7, d24, d9 -+ vmlal.s32 q7, d25, d31 -+ vmull.s32 q1, d18, d2 -+ vmlal.s32 q1, d19, d1 -+ vmlal.s32 q1, d22, d0 -+ vmlal.s32 q1, d24, d27 -+ vmlal.s32 q1, d23, d20 -+ vmlal.s32 q1, d12, d7 -+ vmlal.s32 q1, d13, d6 -+ vmull.s32 q6, d18, d1 -+ vmlal.s32 q6, d19, d0 -+ vmlal.s32 q6, d23, d27 -+ vmlal.s32 q6, d22, d20 -+ vmlal.s32 q6, d24, d26 -+ vmull.s32 q0, d18, d0 -+ vmlal.s32 q0, d22, d27 -+ vmlal.s32 q0, d23, d26 -+ vmlal.s32 q0, d24, d31 -+ vmlal.s32 q0, d19, d20 -+ add r2, sp, #640 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q2, d18, d7 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d18, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d18, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d18, d28 -+ vmlal.s32 q0, d19, d9 -+ vmlal.s32 q6, d18, d29 -+ vmlal.s32 q6, d19, d28 -+ add r2, sp, #592 -+ vld1.8 {d18-d19}, [r2, : 128] -+ add r2, sp, #512 -+ vld1.8 {d22-d23}, [r2, : 128] -+ vmlal.s32 q5, d19, d7 -+ vmlal.s32 q0, d18, d21 -+ vmlal.s32 q0, d19, d29 -+ vmlal.s32 q6, d18, d6 -+ add r2, sp, #528 -+ vld1.8 {d6-d7}, [r2, : 128] -+ vmlal.s32 q6, d19, d21 -+ add r2, sp, #576 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q0, d30, d8 -+ add r2, sp, #672 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q5, d30, d29 -+ add r2, sp, #608 -+ vld1.8 {d24-d25}, [r2, : 128] -+ vmlal.s32 q1, d30, d28 -+ vadd.i64 q13, q0, q11 -+ vadd.i64 q14, q5, q11 -+ vmlal.s32 q6, d30, d9 -+ vshr.s64 q4, q13, #26 -+ vshr.s64 q13, q14, #26 -+ vadd.i64 q7, q7, q4 -+ vshl.i64 q4, q4, #26 -+ vadd.i64 q14, q7, q3 -+ vadd.i64 q9, q9, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q15, q9, q3 -+ vsub.i64 q0, q0, q4 -+ vshr.s64 q4, q14, #25 -+ vsub.i64 q5, q5, q13 -+ vshr.s64 q13, q15, #25 -+ vadd.i64 q6, q6, q4 -+ vshl.i64 q4, q4, #25 -+ vadd.i64 q14, q6, q11 -+ vadd.i64 q2, q2, q13 -+ vsub.i64 q4, q7, q4 -+ vshr.s64 q7, q14, #26 -+ vshl.i64 q13, q13, #25 -+ vadd.i64 q14, q2, q11 -+ vadd.i64 q8, q8, q7 -+ vshl.i64 q7, q7, #26 -+ vadd.i64 q15, q8, q3 -+ vsub.i64 q9, q9, q13 -+ vshr.s64 q13, q14, #26 -+ vsub.i64 q6, q6, q7 -+ vshr.s64 q7, q15, #25 -+ vadd.i64 q10, q10, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q14, q10, q3 -+ vadd.i64 q1, q1, q7 -+ add r2, r3, #144 -+ vshl.i64 q7, q7, #25 -+ add r4, r3, #96 -+ vadd.i64 q15, q1, q11 -+ add r2, r2, #8 -+ vsub.i64 q2, q2, q13 -+ add r4, r4, #8 -+ vshr.s64 q13, q14, #25 -+ vsub.i64 q7, q8, q7 -+ vshr.s64 q8, q15, #26 -+ vadd.i64 q14, q13, q13 -+ vadd.i64 q12, q12, q8 -+ vtrn.32 d12, d14 -+ vshl.i64 q8, q8, #26 -+ vtrn.32 d13, d15 -+ vadd.i64 q3, q12, q3 -+ vadd.i64 q0, q0, q14 -+ vst1.8 d12, [r2, : 64]! -+ vshl.i64 q7, q13, #4 -+ vst1.8 d13, [r4, : 64]! -+ vsub.i64 q1, q1, q8 -+ vshr.s64 q3, q3, #25 -+ vadd.i64 q0, q0, q7 -+ vadd.i64 q5, q5, q3 -+ vshl.i64 q3, q3, #25 -+ vadd.i64 q6, q5, q11 -+ vadd.i64 q0, q0, q13 -+ vshl.i64 q7, q13, #25 -+ vadd.i64 q8, q0, q11 -+ vsub.i64 q3, q12, q3 -+ vshr.s64 q6, q6, #26 -+ vsub.i64 q7, q10, q7 -+ vtrn.32 d2, d6 -+ vshr.s64 q8, q8, #26 -+ vtrn.32 d3, d7 -+ vadd.i64 q3, q9, q6 -+ vst1.8 d2, [r2, : 64] -+ vshl.i64 q6, q6, #26 -+ vst1.8 d3, [r4, : 64] -+ vadd.i64 q1, q4, q8 -+ vtrn.32 d4, d14 -+ vshl.i64 q4, q8, #26 -+ vtrn.32 d5, d15 -+ vsub.i64 q5, q5, q6 -+ add r2, r2, #16 -+ vsub.i64 q0, q0, q4 -+ vst1.8 d4, [r2, : 64] -+ add r4, r4, #16 -+ vst1.8 d5, [r4, : 64] -+ vtrn.32 d10, d6 -+ vtrn.32 d11, d7 -+ sub r2, r2, #8 -+ sub r4, r4, #8 -+ vtrn.32 d0, d2 -+ vtrn.32 d1, d3 -+ vst1.8 d10, [r2, : 64] -+ vst1.8 d11, [r4, : 64] -+ sub r2, r2, #24 -+ sub r4, r4, #24 -+ vst1.8 d0, [r2, : 64] -+ vst1.8 d1, [r4, : 64] -+ add r2, r3, #288 -+ add r4, r3, #336 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vsub.i32 q0, q0, q1 -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d4-d5}, [r4, : 128]! -+ vsub.i32 q1, q1, q2 -+ add r5, r3, #240 -+ vld1.8 {d4}, [r2, : 64] -+ vld1.8 {d6}, [r4, : 64] -+ vsub.i32 q2, q2, q3 -+ vst1.8 {d0-d1}, [r5, : 128]! -+ vst1.8 {d2-d3}, [r5, : 128]! -+ vst1.8 d4, [r5, : 64] -+ add r2, r3, #144 -+ add r4, r3, #96 -+ add r5, r3, #144 -+ add r6, r3, #192 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vsub.i32 q2, q0, q1 -+ vadd.i32 q0, q0, q1 -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vsub.i32 q4, q1, q3 -+ vadd.i32 q1, q1, q3 -+ vld1.8 {d6}, [r2, : 64] -+ vld1.8 {d10}, [r4, : 64] -+ vsub.i32 q6, q3, q5 -+ vadd.i32 q3, q3, q5 -+ vst1.8 {d4-d5}, [r5, : 128]! -+ vst1.8 {d0-d1}, [r6, : 128]! -+ vst1.8 {d8-d9}, [r5, : 128]! -+ vst1.8 {d2-d3}, [r6, : 128]! -+ vst1.8 d12, [r5, : 64] -+ vst1.8 d6, [r6, : 64] -+ add r2, r3, #0 -+ add r4, r3, #240 -+ vld1.8 {d0-d1}, [r4, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vld1.8 {d4}, [r4, : 64] -+ add r4, r3, #336 -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vtrn.32 q0, q3 -+ vld1.8 {d8-d9}, [r4, : 128]! -+ vshl.i32 q5, q0, #4 -+ vtrn.32 q1, q4 -+ vshl.i32 q6, q3, #4 -+ vadd.i32 q5, q5, q0 -+ vadd.i32 q6, q6, q3 -+ vshl.i32 q7, q1, #4 -+ vld1.8 {d5}, [r4, : 64] -+ vshl.i32 q8, q4, #4 -+ vtrn.32 d4, d5 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d18-d19}, [r2, : 128]! -+ vshl.i32 q10, q2, #4 -+ vld1.8 {d22-d23}, [r2, : 128]! -+ vadd.i32 q10, q10, q2 -+ vld1.8 {d24}, [r2, : 64] -+ vadd.i32 q5, q5, q0 -+ add r2, r3, #288 -+ vld1.8 {d26-d27}, [r2, : 128]! -+ vadd.i32 q6, q6, q3 -+ vld1.8 {d28-d29}, [r2, : 128]! -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d25}, [r2, : 64] -+ vadd.i32 q10, q10, q2 -+ vtrn.32 q9, q13 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q5, q5, q0 -+ vtrn.32 q11, q14 -+ vadd.i32 q6, q6, q3 -+ add r2, sp, #560 -+ vadd.i32 q10, q10, q2 -+ vtrn.32 d24, d25 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q6, q13, #1 -+ add r2, sp, #576 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vshl.i32 q10, q14, #1 -+ add r2, sp, #592 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q15, q12, #1 -+ vadd.i32 q8, q8, q4 -+ vext.32 d10, d31, d30, #0 -+ vadd.i32 q7, q7, q1 -+ add r2, sp, #608 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q8, d18, d5 -+ vmlal.s32 q8, d26, d4 -+ vmlal.s32 q8, d19, d9 -+ vmlal.s32 q8, d27, d3 -+ vmlal.s32 q8, d22, d8 -+ vmlal.s32 q8, d28, d2 -+ vmlal.s32 q8, d23, d7 -+ vmlal.s32 q8, d29, d1 -+ vmlal.s32 q8, d24, d6 -+ vmlal.s32 q8, d25, d0 -+ add r2, sp, #624 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q2, d18, d4 -+ vmlal.s32 q2, d12, d9 -+ vmlal.s32 q2, d13, d8 -+ vmlal.s32 q2, d19, d3 -+ vmlal.s32 q2, d22, d2 -+ vmlal.s32 q2, d23, d1 -+ vmlal.s32 q2, d24, d0 -+ add r2, sp, #640 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vmull.s32 q7, d18, d9 -+ vmlal.s32 q7, d26, d3 -+ vmlal.s32 q7, d19, d8 -+ vmlal.s32 q7, d27, d2 -+ vmlal.s32 q7, d22, d7 -+ vmlal.s32 q7, d28, d1 -+ vmlal.s32 q7, d23, d6 -+ vmlal.s32 q7, d29, d0 -+ add r2, sp, #656 -+ vst1.8 {d10-d11}, [r2, : 128] -+ vmull.s32 q5, d18, d3 -+ vmlal.s32 q5, d19, d2 -+ vmlal.s32 q5, d22, d1 -+ vmlal.s32 q5, d23, d0 -+ vmlal.s32 q5, d12, d8 -+ add r2, sp, #672 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q4, d18, d8 -+ vmlal.s32 q4, d26, d2 -+ vmlal.s32 q4, d19, d7 -+ vmlal.s32 q4, d27, d1 -+ vmlal.s32 q4, d22, d6 -+ vmlal.s32 q4, d28, d0 -+ vmull.s32 q8, d18, d7 -+ vmlal.s32 q8, d26, d1 -+ vmlal.s32 q8, d19, d6 -+ vmlal.s32 q8, d27, d0 -+ add r2, sp, #576 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q7, d24, d21 -+ vmlal.s32 q7, d25, d20 -+ vmlal.s32 q4, d23, d21 -+ vmlal.s32 q4, d29, d20 -+ vmlal.s32 q8, d22, d21 -+ vmlal.s32 q8, d28, d20 -+ vmlal.s32 q5, d24, d20 -+ add r2, sp, #576 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q7, d18, d6 -+ vmlal.s32 q7, d26, d0 -+ add r2, sp, #656 -+ vld1.8 {d30-d31}, [r2, : 128] -+ vmlal.s32 q2, d30, d21 -+ vmlal.s32 q7, d19, d21 -+ vmlal.s32 q7, d27, d20 -+ add r2, sp, #624 -+ vld1.8 {d26-d27}, [r2, : 128] -+ vmlal.s32 q4, d25, d27 -+ vmlal.s32 q8, d29, d27 -+ vmlal.s32 q8, d25, d26 -+ vmlal.s32 q7, d28, d27 -+ vmlal.s32 q7, d29, d26 -+ add r2, sp, #608 -+ vld1.8 {d28-d29}, [r2, : 128] -+ vmlal.s32 q4, d24, d29 -+ vmlal.s32 q8, d23, d29 -+ vmlal.s32 q8, d24, d28 -+ vmlal.s32 q7, d22, d29 -+ vmlal.s32 q7, d23, d28 -+ add r2, sp, #608 -+ vst1.8 {d8-d9}, [r2, : 128] -+ add r2, sp, #560 -+ vld1.8 {d8-d9}, [r2, : 128] -+ vmlal.s32 q7, d24, d9 -+ vmlal.s32 q7, d25, d31 -+ vmull.s32 q1, d18, d2 -+ vmlal.s32 q1, d19, d1 -+ vmlal.s32 q1, d22, d0 -+ vmlal.s32 q1, d24, d27 -+ vmlal.s32 q1, d23, d20 -+ vmlal.s32 q1, d12, d7 -+ vmlal.s32 q1, d13, d6 -+ vmull.s32 q6, d18, d1 -+ vmlal.s32 q6, d19, d0 -+ vmlal.s32 q6, d23, d27 -+ vmlal.s32 q6, d22, d20 -+ vmlal.s32 q6, d24, d26 -+ vmull.s32 q0, d18, d0 -+ vmlal.s32 q0, d22, d27 -+ vmlal.s32 q0, d23, d26 -+ vmlal.s32 q0, d24, d31 -+ vmlal.s32 q0, d19, d20 -+ add r2, sp, #640 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q2, d18, d7 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d18, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d18, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d18, d28 -+ vmlal.s32 q0, d19, d9 -+ vmlal.s32 q6, d18, d29 -+ vmlal.s32 q6, d19, d28 -+ add r2, sp, #592 -+ vld1.8 {d18-d19}, [r2, : 128] -+ add r2, sp, #512 -+ vld1.8 {d22-d23}, [r2, : 128] -+ vmlal.s32 q5, d19, d7 -+ vmlal.s32 q0, d18, d21 -+ vmlal.s32 q0, d19, d29 -+ vmlal.s32 q6, d18, d6 -+ add r2, sp, #528 -+ vld1.8 {d6-d7}, [r2, : 128] -+ vmlal.s32 q6, d19, d21 -+ add r2, sp, #576 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q0, d30, d8 -+ add r2, sp, #672 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q5, d30, d29 -+ add r2, sp, #608 -+ vld1.8 {d24-d25}, [r2, : 128] -+ vmlal.s32 q1, d30, d28 -+ vadd.i64 q13, q0, q11 -+ vadd.i64 q14, q5, q11 -+ vmlal.s32 q6, d30, d9 -+ vshr.s64 q4, q13, #26 -+ vshr.s64 q13, q14, #26 -+ vadd.i64 q7, q7, q4 -+ vshl.i64 q4, q4, #26 -+ vadd.i64 q14, q7, q3 -+ vadd.i64 q9, q9, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q15, q9, q3 -+ vsub.i64 q0, q0, q4 -+ vshr.s64 q4, q14, #25 -+ vsub.i64 q5, q5, q13 -+ vshr.s64 q13, q15, #25 -+ vadd.i64 q6, q6, q4 -+ vshl.i64 q4, q4, #25 -+ vadd.i64 q14, q6, q11 -+ vadd.i64 q2, q2, q13 -+ vsub.i64 q4, q7, q4 -+ vshr.s64 q7, q14, #26 -+ vshl.i64 q13, q13, #25 -+ vadd.i64 q14, q2, q11 -+ vadd.i64 q8, q8, q7 -+ vshl.i64 q7, q7, #26 -+ vadd.i64 q15, q8, q3 -+ vsub.i64 q9, q9, q13 -+ vshr.s64 q13, q14, #26 -+ vsub.i64 q6, q6, q7 -+ vshr.s64 q7, q15, #25 -+ vadd.i64 q10, q10, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q14, q10, q3 -+ vadd.i64 q1, q1, q7 -+ add r2, r3, #288 -+ vshl.i64 q7, q7, #25 -+ add r4, r3, #96 -+ vadd.i64 q15, q1, q11 -+ add r2, r2, #8 -+ vsub.i64 q2, q2, q13 -+ add r4, r4, #8 -+ vshr.s64 q13, q14, #25 -+ vsub.i64 q7, q8, q7 -+ vshr.s64 q8, q15, #26 -+ vadd.i64 q14, q13, q13 -+ vadd.i64 q12, q12, q8 -+ vtrn.32 d12, d14 -+ vshl.i64 q8, q8, #26 -+ vtrn.32 d13, d15 -+ vadd.i64 q3, q12, q3 -+ vadd.i64 q0, q0, q14 -+ vst1.8 d12, [r2, : 64]! -+ vshl.i64 q7, q13, #4 -+ vst1.8 d13, [r4, : 64]! -+ vsub.i64 q1, q1, q8 -+ vshr.s64 q3, q3, #25 -+ vadd.i64 q0, q0, q7 -+ vadd.i64 q5, q5, q3 -+ vshl.i64 q3, q3, #25 -+ vadd.i64 q6, q5, q11 -+ vadd.i64 q0, q0, q13 -+ vshl.i64 q7, q13, #25 -+ vadd.i64 q8, q0, q11 -+ vsub.i64 q3, q12, q3 -+ vshr.s64 q6, q6, #26 -+ vsub.i64 q7, q10, q7 -+ vtrn.32 d2, d6 -+ vshr.s64 q8, q8, #26 -+ vtrn.32 d3, d7 -+ vadd.i64 q3, q9, q6 -+ vst1.8 d2, [r2, : 64] -+ vshl.i64 q6, q6, #26 -+ vst1.8 d3, [r4, : 64] -+ vadd.i64 q1, q4, q8 -+ vtrn.32 d4, d14 -+ vshl.i64 q4, q8, #26 -+ vtrn.32 d5, d15 -+ vsub.i64 q5, q5, q6 -+ add r2, r2, #16 -+ vsub.i64 q0, q0, q4 -+ vst1.8 d4, [r2, : 64] -+ add r4, r4, #16 -+ vst1.8 d5, [r4, : 64] -+ vtrn.32 d10, d6 -+ vtrn.32 d11, d7 -+ sub r2, r2, #8 -+ sub r4, r4, #8 -+ vtrn.32 d0, d2 -+ vtrn.32 d1, d3 -+ vst1.8 d10, [r2, : 64] -+ vst1.8 d11, [r4, : 64] -+ sub r2, r2, #24 -+ sub r4, r4, #24 -+ vst1.8 d0, [r2, : 64] -+ vst1.8 d1, [r4, : 64] -+ add r2, sp, #544 -+ add r4, r3, #144 -+ add r5, r3, #192 -+ vld1.8 {d0-d1}, [r2, : 128] -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vld1.8 {d4-d5}, [r5, : 128]! -+ vzip.i32 q1, q2 -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vld1.8 {d8-d9}, [r5, : 128]! -+ vshl.i32 q5, q1, #1 -+ vzip.i32 q3, q4 -+ vshl.i32 q6, q2, #1 -+ vld1.8 {d14}, [r4, : 64] -+ vshl.i32 q8, q3, #1 -+ vld1.8 {d15}, [r5, : 64] -+ vshl.i32 q9, q4, #1 -+ vmul.i32 d21, d7, d1 -+ vtrn.32 d14, d15 -+ vmul.i32 q11, q4, q0 -+ vmul.i32 q0, q7, q0 -+ vmull.s32 q12, d2, d2 -+ vmlal.s32 q12, d11, d1 -+ vmlal.s32 q12, d12, d0 -+ vmlal.s32 q12, d13, d23 -+ vmlal.s32 q12, d16, d22 -+ vmlal.s32 q12, d7, d21 -+ vmull.s32 q10, d2, d11 -+ vmlal.s32 q10, d4, d1 -+ vmlal.s32 q10, d13, d0 -+ vmlal.s32 q10, d6, d23 -+ vmlal.s32 q10, d17, d22 -+ vmull.s32 q13, d10, d4 -+ vmlal.s32 q13, d11, d3 -+ vmlal.s32 q13, d13, d1 -+ vmlal.s32 q13, d16, d0 -+ vmlal.s32 q13, d17, d23 -+ vmlal.s32 q13, d8, d22 -+ vmull.s32 q1, d10, d5 -+ vmlal.s32 q1, d11, d4 -+ vmlal.s32 q1, d6, d1 -+ vmlal.s32 q1, d17, d0 -+ vmlal.s32 q1, d8, d23 -+ vmull.s32 q14, d10, d6 -+ vmlal.s32 q14, d11, d13 -+ vmlal.s32 q14, d4, d4 -+ vmlal.s32 q14, d17, d1 -+ vmlal.s32 q14, d18, d0 -+ vmlal.s32 q14, d9, d23 -+ vmull.s32 q11, d10, d7 -+ vmlal.s32 q11, d11, d6 -+ vmlal.s32 q11, d12, d5 -+ vmlal.s32 q11, d8, d1 -+ vmlal.s32 q11, d19, d0 -+ vmull.s32 q15, d10, d8 -+ vmlal.s32 q15, d11, d17 -+ vmlal.s32 q15, d12, d6 -+ vmlal.s32 q15, d13, d5 -+ vmlal.s32 q15, d19, d1 -+ vmlal.s32 q15, d14, d0 -+ vmull.s32 q2, d10, d9 -+ vmlal.s32 q2, d11, d8 -+ vmlal.s32 q2, d12, d7 -+ vmlal.s32 q2, d13, d6 -+ vmlal.s32 q2, d14, d1 -+ vmull.s32 q0, d15, d1 -+ vmlal.s32 q0, d10, d14 -+ vmlal.s32 q0, d11, d19 -+ vmlal.s32 q0, d12, d8 -+ vmlal.s32 q0, d13, d17 -+ vmlal.s32 q0, d6, d6 -+ add r2, sp, #512 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmull.s32 q3, d16, d7 -+ vmlal.s32 q3, d10, d15 -+ vmlal.s32 q3, d11, d14 -+ vmlal.s32 q3, d12, d9 -+ vmlal.s32 q3, d13, d8 -+ add r2, sp, #528 -+ vld1.8 {d8-d9}, [r2, : 128] -+ vadd.i64 q5, q12, q9 -+ vadd.i64 q6, q15, q9 -+ vshr.s64 q5, q5, #26 -+ vshr.s64 q6, q6, #26 -+ vadd.i64 q7, q10, q5 -+ vshl.i64 q5, q5, #26 -+ vadd.i64 q8, q7, q4 -+ vadd.i64 q2, q2, q6 -+ vshl.i64 q6, q6, #26 -+ vadd.i64 q10, q2, q4 -+ vsub.i64 q5, q12, q5 -+ vshr.s64 q8, q8, #25 -+ vsub.i64 q6, q15, q6 -+ vshr.s64 q10, q10, #25 -+ vadd.i64 q12, q13, q8 -+ vshl.i64 q8, q8, #25 -+ vadd.i64 q13, q12, q9 -+ vadd.i64 q0, q0, q10 -+ vsub.i64 q7, q7, q8 -+ vshr.s64 q8, q13, #26 -+ vshl.i64 q10, q10, #25 -+ vadd.i64 q13, q0, q9 -+ vadd.i64 q1, q1, q8 -+ vshl.i64 q8, q8, #26 -+ vadd.i64 q15, q1, q4 -+ vsub.i64 q2, q2, q10 -+ vshr.s64 q10, q13, #26 -+ vsub.i64 q8, q12, q8 -+ vshr.s64 q12, q15, #25 -+ vadd.i64 q3, q3, q10 -+ vshl.i64 q10, q10, #26 -+ vadd.i64 q13, q3, q4 -+ vadd.i64 q14, q14, q12 -+ add r2, r3, #144 -+ vshl.i64 q12, q12, #25 -+ add r4, r3, #192 -+ vadd.i64 q15, q14, q9 -+ add r2, r2, #8 -+ vsub.i64 q0, q0, q10 -+ add r4, r4, #8 -+ vshr.s64 q10, q13, #25 -+ vsub.i64 q1, q1, q12 -+ vshr.s64 q12, q15, #26 -+ vadd.i64 q13, q10, q10 -+ vadd.i64 q11, q11, q12 -+ vtrn.32 d16, d2 -+ vshl.i64 q12, q12, #26 -+ vtrn.32 d17, d3 -+ vadd.i64 q1, q11, q4 -+ vadd.i64 q4, q5, q13 -+ vst1.8 d16, [r2, : 64]! -+ vshl.i64 q5, q10, #4 -+ vst1.8 d17, [r4, : 64]! -+ vsub.i64 q8, q14, q12 -+ vshr.s64 q1, q1, #25 -+ vadd.i64 q4, q4, q5 -+ vadd.i64 q5, q6, q1 -+ vshl.i64 q1, q1, #25 -+ vadd.i64 q6, q5, q9 -+ vadd.i64 q4, q4, q10 -+ vshl.i64 q10, q10, #25 -+ vadd.i64 q9, q4, q9 -+ vsub.i64 q1, q11, q1 -+ vshr.s64 q6, q6, #26 -+ vsub.i64 q3, q3, q10 -+ vtrn.32 d16, d2 -+ vshr.s64 q9, q9, #26 -+ vtrn.32 d17, d3 -+ vadd.i64 q1, q2, q6 -+ vst1.8 d16, [r2, : 64] -+ vshl.i64 q2, q6, #26 -+ vst1.8 d17, [r4, : 64] -+ vadd.i64 q6, q7, q9 -+ vtrn.32 d0, d6 -+ vshl.i64 q7, q9, #26 -+ vtrn.32 d1, d7 -+ vsub.i64 q2, q5, q2 -+ add r2, r2, #16 -+ vsub.i64 q3, q4, q7 -+ vst1.8 d0, [r2, : 64] -+ add r4, r4, #16 -+ vst1.8 d1, [r4, : 64] -+ vtrn.32 d4, d2 -+ vtrn.32 d5, d3 -+ sub r2, r2, #8 -+ sub r4, r4, #8 -+ vtrn.32 d6, d12 -+ vtrn.32 d7, d13 -+ vst1.8 d4, [r2, : 64] -+ vst1.8 d5, [r4, : 64] -+ sub r2, r2, #24 -+ sub r4, r4, #24 -+ vst1.8 d6, [r2, : 64] -+ vst1.8 d7, [r4, : 64] -+ add r2, r3, #336 -+ add r4, r3, #288 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vadd.i32 q0, q0, q1 -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d4-d5}, [r4, : 128]! -+ vadd.i32 q1, q1, q2 -+ add r5, r3, #288 -+ vld1.8 {d4}, [r2, : 64] -+ vld1.8 {d6}, [r4, : 64] -+ vadd.i32 q2, q2, q3 -+ vst1.8 {d0-d1}, [r5, : 128]! -+ vst1.8 {d2-d3}, [r5, : 128]! -+ vst1.8 d4, [r5, : 64] -+ add r2, r3, #48 -+ add r4, r3, #144 -+ vld1.8 {d0-d1}, [r4, : 128]! -+ vld1.8 {d2-d3}, [r4, : 128]! -+ vld1.8 {d4}, [r4, : 64] -+ add r4, r3, #288 -+ vld1.8 {d6-d7}, [r4, : 128]! -+ vtrn.32 q0, q3 -+ vld1.8 {d8-d9}, [r4, : 128]! -+ vshl.i32 q5, q0, #4 -+ vtrn.32 q1, q4 -+ vshl.i32 q6, q3, #4 -+ vadd.i32 q5, q5, q0 -+ vadd.i32 q6, q6, q3 -+ vshl.i32 q7, q1, #4 -+ vld1.8 {d5}, [r4, : 64] -+ vshl.i32 q8, q4, #4 -+ vtrn.32 d4, d5 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d18-d19}, [r2, : 128]! -+ vshl.i32 q10, q2, #4 -+ vld1.8 {d22-d23}, [r2, : 128]! -+ vadd.i32 q10, q10, q2 -+ vld1.8 {d24}, [r2, : 64] -+ vadd.i32 q5, q5, q0 -+ add r2, r3, #240 -+ vld1.8 {d26-d27}, [r2, : 128]! -+ vadd.i32 q6, q6, q3 -+ vld1.8 {d28-d29}, [r2, : 128]! -+ vadd.i32 q8, q8, q4 -+ vld1.8 {d25}, [r2, : 64] -+ vadd.i32 q10, q10, q2 -+ vtrn.32 q9, q13 -+ vadd.i32 q7, q7, q1 -+ vadd.i32 q5, q5, q0 -+ vtrn.32 q11, q14 -+ vadd.i32 q6, q6, q3 -+ add r2, sp, #560 -+ vadd.i32 q10, q10, q2 -+ vtrn.32 d24, d25 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q6, q13, #1 -+ add r2, sp, #576 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vshl.i32 q10, q14, #1 -+ add r2, sp, #592 -+ vst1.8 {d12-d13}, [r2, : 128] -+ vshl.i32 q15, q12, #1 -+ vadd.i32 q8, q8, q4 -+ vext.32 d10, d31, d30, #0 -+ vadd.i32 q7, q7, q1 -+ add r2, sp, #608 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q8, d18, d5 -+ vmlal.s32 q8, d26, d4 -+ vmlal.s32 q8, d19, d9 -+ vmlal.s32 q8, d27, d3 -+ vmlal.s32 q8, d22, d8 -+ vmlal.s32 q8, d28, d2 -+ vmlal.s32 q8, d23, d7 -+ vmlal.s32 q8, d29, d1 -+ vmlal.s32 q8, d24, d6 -+ vmlal.s32 q8, d25, d0 -+ add r2, sp, #624 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q2, d18, d4 -+ vmlal.s32 q2, d12, d9 -+ vmlal.s32 q2, d13, d8 -+ vmlal.s32 q2, d19, d3 -+ vmlal.s32 q2, d22, d2 -+ vmlal.s32 q2, d23, d1 -+ vmlal.s32 q2, d24, d0 -+ add r2, sp, #640 -+ vst1.8 {d20-d21}, [r2, : 128] -+ vmull.s32 q7, d18, d9 -+ vmlal.s32 q7, d26, d3 -+ vmlal.s32 q7, d19, d8 -+ vmlal.s32 q7, d27, d2 -+ vmlal.s32 q7, d22, d7 -+ vmlal.s32 q7, d28, d1 -+ vmlal.s32 q7, d23, d6 -+ vmlal.s32 q7, d29, d0 -+ add r2, sp, #656 -+ vst1.8 {d10-d11}, [r2, : 128] -+ vmull.s32 q5, d18, d3 -+ vmlal.s32 q5, d19, d2 -+ vmlal.s32 q5, d22, d1 -+ vmlal.s32 q5, d23, d0 -+ vmlal.s32 q5, d12, d8 -+ add r2, sp, #672 -+ vst1.8 {d16-d17}, [r2, : 128] -+ vmull.s32 q4, d18, d8 -+ vmlal.s32 q4, d26, d2 -+ vmlal.s32 q4, d19, d7 -+ vmlal.s32 q4, d27, d1 -+ vmlal.s32 q4, d22, d6 -+ vmlal.s32 q4, d28, d0 -+ vmull.s32 q8, d18, d7 -+ vmlal.s32 q8, d26, d1 -+ vmlal.s32 q8, d19, d6 -+ vmlal.s32 q8, d27, d0 -+ add r2, sp, #576 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q7, d24, d21 -+ vmlal.s32 q7, d25, d20 -+ vmlal.s32 q4, d23, d21 -+ vmlal.s32 q4, d29, d20 -+ vmlal.s32 q8, d22, d21 -+ vmlal.s32 q8, d28, d20 -+ vmlal.s32 q5, d24, d20 -+ add r2, sp, #576 -+ vst1.8 {d14-d15}, [r2, : 128] -+ vmull.s32 q7, d18, d6 -+ vmlal.s32 q7, d26, d0 -+ add r2, sp, #656 -+ vld1.8 {d30-d31}, [r2, : 128] -+ vmlal.s32 q2, d30, d21 -+ vmlal.s32 q7, d19, d21 -+ vmlal.s32 q7, d27, d20 -+ add r2, sp, #624 -+ vld1.8 {d26-d27}, [r2, : 128] -+ vmlal.s32 q4, d25, d27 -+ vmlal.s32 q8, d29, d27 -+ vmlal.s32 q8, d25, d26 -+ vmlal.s32 q7, d28, d27 -+ vmlal.s32 q7, d29, d26 -+ add r2, sp, #608 -+ vld1.8 {d28-d29}, [r2, : 128] -+ vmlal.s32 q4, d24, d29 -+ vmlal.s32 q8, d23, d29 -+ vmlal.s32 q8, d24, d28 -+ vmlal.s32 q7, d22, d29 -+ vmlal.s32 q7, d23, d28 -+ add r2, sp, #608 -+ vst1.8 {d8-d9}, [r2, : 128] -+ add r2, sp, #560 -+ vld1.8 {d8-d9}, [r2, : 128] -+ vmlal.s32 q7, d24, d9 -+ vmlal.s32 q7, d25, d31 -+ vmull.s32 q1, d18, d2 -+ vmlal.s32 q1, d19, d1 -+ vmlal.s32 q1, d22, d0 -+ vmlal.s32 q1, d24, d27 -+ vmlal.s32 q1, d23, d20 -+ vmlal.s32 q1, d12, d7 -+ vmlal.s32 q1, d13, d6 -+ vmull.s32 q6, d18, d1 -+ vmlal.s32 q6, d19, d0 -+ vmlal.s32 q6, d23, d27 -+ vmlal.s32 q6, d22, d20 -+ vmlal.s32 q6, d24, d26 -+ vmull.s32 q0, d18, d0 -+ vmlal.s32 q0, d22, d27 -+ vmlal.s32 q0, d23, d26 -+ vmlal.s32 q0, d24, d31 -+ vmlal.s32 q0, d19, d20 -+ add r2, sp, #640 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q2, d18, d7 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d18, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d18, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d18, d28 -+ vmlal.s32 q0, d19, d9 -+ vmlal.s32 q6, d18, d29 -+ vmlal.s32 q6, d19, d28 -+ add r2, sp, #592 -+ vld1.8 {d18-d19}, [r2, : 128] -+ add r2, sp, #512 -+ vld1.8 {d22-d23}, [r2, : 128] -+ vmlal.s32 q5, d19, d7 -+ vmlal.s32 q0, d18, d21 -+ vmlal.s32 q0, d19, d29 -+ vmlal.s32 q6, d18, d6 -+ add r2, sp, #528 -+ vld1.8 {d6-d7}, [r2, : 128] -+ vmlal.s32 q6, d19, d21 -+ add r2, sp, #576 -+ vld1.8 {d18-d19}, [r2, : 128] -+ vmlal.s32 q0, d30, d8 -+ add r2, sp, #672 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vmlal.s32 q5, d30, d29 -+ add r2, sp, #608 -+ vld1.8 {d24-d25}, [r2, : 128] -+ vmlal.s32 q1, d30, d28 -+ vadd.i64 q13, q0, q11 -+ vadd.i64 q14, q5, q11 -+ vmlal.s32 q6, d30, d9 -+ vshr.s64 q4, q13, #26 -+ vshr.s64 q13, q14, #26 -+ vadd.i64 q7, q7, q4 -+ vshl.i64 q4, q4, #26 -+ vadd.i64 q14, q7, q3 -+ vadd.i64 q9, q9, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q15, q9, q3 -+ vsub.i64 q0, q0, q4 -+ vshr.s64 q4, q14, #25 -+ vsub.i64 q5, q5, q13 -+ vshr.s64 q13, q15, #25 -+ vadd.i64 q6, q6, q4 -+ vshl.i64 q4, q4, #25 -+ vadd.i64 q14, q6, q11 -+ vadd.i64 q2, q2, q13 -+ vsub.i64 q4, q7, q4 -+ vshr.s64 q7, q14, #26 -+ vshl.i64 q13, q13, #25 -+ vadd.i64 q14, q2, q11 -+ vadd.i64 q8, q8, q7 -+ vshl.i64 q7, q7, #26 -+ vadd.i64 q15, q8, q3 -+ vsub.i64 q9, q9, q13 -+ vshr.s64 q13, q14, #26 -+ vsub.i64 q6, q6, q7 -+ vshr.s64 q7, q15, #25 -+ vadd.i64 q10, q10, q13 -+ vshl.i64 q13, q13, #26 -+ vadd.i64 q14, q10, q3 -+ vadd.i64 q1, q1, q7 -+ add r2, r3, #240 -+ vshl.i64 q7, q7, #25 -+ add r4, r3, #144 -+ vadd.i64 q15, q1, q11 -+ add r2, r2, #8 -+ vsub.i64 q2, q2, q13 -+ add r4, r4, #8 -+ vshr.s64 q13, q14, #25 -+ vsub.i64 q7, q8, q7 -+ vshr.s64 q8, q15, #26 -+ vadd.i64 q14, q13, q13 -+ vadd.i64 q12, q12, q8 -+ vtrn.32 d12, d14 -+ vshl.i64 q8, q8, #26 -+ vtrn.32 d13, d15 -+ vadd.i64 q3, q12, q3 -+ vadd.i64 q0, q0, q14 -+ vst1.8 d12, [r2, : 64]! -+ vshl.i64 q7, q13, #4 -+ vst1.8 d13, [r4, : 64]! -+ vsub.i64 q1, q1, q8 -+ vshr.s64 q3, q3, #25 -+ vadd.i64 q0, q0, q7 -+ vadd.i64 q5, q5, q3 -+ vshl.i64 q3, q3, #25 -+ vadd.i64 q6, q5, q11 -+ vadd.i64 q0, q0, q13 -+ vshl.i64 q7, q13, #25 -+ vadd.i64 q8, q0, q11 -+ vsub.i64 q3, q12, q3 -+ vshr.s64 q6, q6, #26 -+ vsub.i64 q7, q10, q7 -+ vtrn.32 d2, d6 -+ vshr.s64 q8, q8, #26 -+ vtrn.32 d3, d7 -+ vadd.i64 q3, q9, q6 -+ vst1.8 d2, [r2, : 64] -+ vshl.i64 q6, q6, #26 -+ vst1.8 d3, [r4, : 64] -+ vadd.i64 q1, q4, q8 -+ vtrn.32 d4, d14 -+ vshl.i64 q4, q8, #26 -+ vtrn.32 d5, d15 -+ vsub.i64 q5, q5, q6 -+ add r2, r2, #16 -+ vsub.i64 q0, q0, q4 -+ vst1.8 d4, [r2, : 64] -+ add r4, r4, #16 -+ vst1.8 d5, [r4, : 64] -+ vtrn.32 d10, d6 -+ vtrn.32 d11, d7 -+ sub r2, r2, #8 -+ sub r4, r4, #8 -+ vtrn.32 d0, d2 -+ vtrn.32 d1, d3 -+ vst1.8 d10, [r2, : 64] -+ vst1.8 d11, [r4, : 64] -+ sub r2, r2, #24 -+ sub r4, r4, #24 -+ vst1.8 d0, [r2, : 64] -+ vst1.8 d1, [r4, : 64] -+ ldr r2, [sp, #488] -+ ldr r4, [sp, #492] -+ subs r5, r2, #1 -+ bge ._mainloop -+ add r1, r3, #144 -+ add r2, r3, #336 -+ vld1.8 {d0-d1}, [r1, : 128]! -+ vld1.8 {d2-d3}, [r1, : 128]! -+ vld1.8 {d4}, [r1, : 64] -+ vst1.8 {d0-d1}, [r2, : 128]! -+ vst1.8 {d2-d3}, [r2, : 128]! -+ vst1.8 d4, [r2, : 64] -+ ldr r1, =0 -+._invertloop: -+ add r2, r3, #144 -+ ldr r4, =0 -+ ldr r5, =2 -+ cmp r1, #1 -+ ldreq r5, =1 -+ addeq r2, r3, #336 -+ addeq r4, r3, #48 -+ cmp r1, #2 -+ ldreq r5, =1 -+ addeq r2, r3, #48 -+ cmp r1, #3 -+ ldreq r5, =5 -+ addeq r4, r3, #336 -+ cmp r1, #4 -+ ldreq r5, =10 -+ cmp r1, #5 -+ ldreq r5, =20 -+ cmp r1, #6 -+ ldreq r5, =10 -+ addeq r2, r3, #336 -+ addeq r4, r3, #336 -+ cmp r1, #7 -+ ldreq r5, =50 -+ cmp r1, #8 -+ ldreq r5, =100 -+ cmp r1, #9 -+ ldreq r5, =50 -+ addeq r2, r3, #336 -+ cmp r1, #10 -+ ldreq r5, =5 -+ addeq r2, r3, #48 -+ cmp r1, #11 -+ ldreq r5, =0 -+ addeq r2, r3, #96 -+ add r6, r3, #144 -+ add r7, r3, #288 -+ vld1.8 {d0-d1}, [r6, : 128]! -+ vld1.8 {d2-d3}, [r6, : 128]! -+ vld1.8 {d4}, [r6, : 64] -+ vst1.8 {d0-d1}, [r7, : 128]! -+ vst1.8 {d2-d3}, [r7, : 128]! -+ vst1.8 d4, [r7, : 64] -+ cmp r5, #0 -+ beq ._skipsquaringloop -+._squaringloop: -+ add r6, r3, #288 -+ add r7, r3, #288 -+ add r8, r3, #288 -+ vmov.i32 q0, #19 -+ vmov.i32 q1, #0 -+ vmov.i32 q2, #1 -+ vzip.i32 q1, q2 -+ vld1.8 {d4-d5}, [r7, : 128]! -+ vld1.8 {d6-d7}, [r7, : 128]! -+ vld1.8 {d9}, [r7, : 64] -+ vld1.8 {d10-d11}, [r6, : 128]! -+ add r7, sp, #416 -+ vld1.8 {d12-d13}, [r6, : 128]! -+ vmul.i32 q7, q2, q0 -+ vld1.8 {d8}, [r6, : 64] -+ vext.32 d17, d11, d10, #1 -+ vmul.i32 q9, q3, q0 -+ vext.32 d16, d10, d8, #1 -+ vshl.u32 q10, q5, q1 -+ vext.32 d22, d14, d4, #1 -+ vext.32 d24, d18, d6, #1 -+ vshl.u32 q13, q6, q1 -+ vshl.u32 d28, d8, d2 -+ vrev64.i32 d22, d22 -+ vmul.i32 d1, d9, d1 -+ vrev64.i32 d24, d24 -+ vext.32 d29, d8, d13, #1 -+ vext.32 d0, d1, d9, #1 -+ vrev64.i32 d0, d0 -+ vext.32 d2, d9, d1, #1 -+ vext.32 d23, d15, d5, #1 -+ vmull.s32 q4, d20, d4 -+ vrev64.i32 d23, d23 -+ vmlal.s32 q4, d21, d1 -+ vrev64.i32 d2, d2 -+ vmlal.s32 q4, d26, d19 -+ vext.32 d3, d5, d15, #1 -+ vmlal.s32 q4, d27, d18 -+ vrev64.i32 d3, d3 -+ vmlal.s32 q4, d28, d15 -+ vext.32 d14, d12, d11, #1 -+ vmull.s32 q5, d16, d23 -+ vext.32 d15, d13, d12, #1 -+ vmlal.s32 q5, d17, d4 -+ vst1.8 d8, [r7, : 64]! -+ vmlal.s32 q5, d14, d1 -+ vext.32 d12, d9, d8, #0 -+ vmlal.s32 q5, d15, d19 -+ vmov.i64 d13, #0 -+ vmlal.s32 q5, d29, d18 -+ vext.32 d25, d19, d7, #1 -+ vmlal.s32 q6, d20, d5 -+ vrev64.i32 d25, d25 -+ vmlal.s32 q6, d21, d4 -+ vst1.8 d11, [r7, : 64]! -+ vmlal.s32 q6, d26, d1 -+ vext.32 d9, d10, d10, #0 -+ vmlal.s32 q6, d27, d19 -+ vmov.i64 d8, #0 -+ vmlal.s32 q6, d28, d18 -+ vmlal.s32 q4, d16, d24 -+ vmlal.s32 q4, d17, d5 -+ vmlal.s32 q4, d14, d4 -+ vst1.8 d12, [r7, : 64]! -+ vmlal.s32 q4, d15, d1 -+ vext.32 d10, d13, d12, #0 -+ vmlal.s32 q4, d29, d19 -+ vmov.i64 d11, #0 -+ vmlal.s32 q5, d20, d6 -+ vmlal.s32 q5, d21, d5 -+ vmlal.s32 q5, d26, d4 -+ vext.32 d13, d8, d8, #0 -+ vmlal.s32 q5, d27, d1 -+ vmov.i64 d12, #0 -+ vmlal.s32 q5, d28, d19 -+ vst1.8 d9, [r7, : 64]! -+ vmlal.s32 q6, d16, d25 -+ vmlal.s32 q6, d17, d6 -+ vst1.8 d10, [r7, : 64] -+ vmlal.s32 q6, d14, d5 -+ vext.32 d8, d11, d10, #0 -+ vmlal.s32 q6, d15, d4 -+ vmov.i64 d9, #0 -+ vmlal.s32 q6, d29, d1 -+ vmlal.s32 q4, d20, d7 -+ vmlal.s32 q4, d21, d6 -+ vmlal.s32 q4, d26, d5 -+ vext.32 d11, d12, d12, #0 -+ vmlal.s32 q4, d27, d4 -+ vmov.i64 d10, #0 -+ vmlal.s32 q4, d28, d1 -+ vmlal.s32 q5, d16, d0 -+ sub r6, r7, #32 -+ vmlal.s32 q5, d17, d7 -+ vmlal.s32 q5, d14, d6 -+ vext.32 d30, d9, d8, #0 -+ vmlal.s32 q5, d15, d5 -+ vld1.8 {d31}, [r6, : 64]! -+ vmlal.s32 q5, d29, d4 -+ vmlal.s32 q15, d20, d0 -+ vext.32 d0, d6, d18, #1 -+ vmlal.s32 q15, d21, d25 -+ vrev64.i32 d0, d0 -+ vmlal.s32 q15, d26, d24 -+ vext.32 d1, d7, d19, #1 -+ vext.32 d7, d10, d10, #0 -+ vmlal.s32 q15, d27, d23 -+ vrev64.i32 d1, d1 -+ vld1.8 {d6}, [r6, : 64] -+ vmlal.s32 q15, d28, d22 -+ vmlal.s32 q3, d16, d4 -+ add r6, r6, #24 -+ vmlal.s32 q3, d17, d2 -+ vext.32 d4, d31, d30, #0 -+ vmov d17, d11 -+ vmlal.s32 q3, d14, d1 -+ vext.32 d11, d13, d13, #0 -+ vext.32 d13, d30, d30, #0 -+ vmlal.s32 q3, d15, d0 -+ vext.32 d1, d8, d8, #0 -+ vmlal.s32 q3, d29, d3 -+ vld1.8 {d5}, [r6, : 64] -+ sub r6, r6, #16 -+ vext.32 d10, d6, d6, #0 -+ vmov.i32 q1, #0xffffffff -+ vshl.i64 q4, q1, #25 -+ add r7, sp, #512 -+ vld1.8 {d14-d15}, [r7, : 128] -+ vadd.i64 q9, q2, q7 -+ vshl.i64 q1, q1, #26 -+ vshr.s64 q10, q9, #26 -+ vld1.8 {d0}, [r6, : 64]! -+ vadd.i64 q5, q5, q10 -+ vand q9, q9, q1 -+ vld1.8 {d16}, [r6, : 64]! -+ add r6, sp, #528 -+ vld1.8 {d20-d21}, [r6, : 128] -+ vadd.i64 q11, q5, q10 -+ vsub.i64 q2, q2, q9 -+ vshr.s64 q9, q11, #25 -+ vext.32 d12, d5, d4, #0 -+ vand q11, q11, q4 -+ vadd.i64 q0, q0, q9 -+ vmov d19, d7 -+ vadd.i64 q3, q0, q7 -+ vsub.i64 q5, q5, q11 -+ vshr.s64 q11, q3, #26 -+ vext.32 d18, d11, d10, #0 -+ vand q3, q3, q1 -+ vadd.i64 q8, q8, q11 -+ vadd.i64 q11, q8, q10 -+ vsub.i64 q0, q0, q3 -+ vshr.s64 q3, q11, #25 -+ vand q11, q11, q4 -+ vadd.i64 q3, q6, q3 -+ vadd.i64 q6, q3, q7 -+ vsub.i64 q8, q8, q11 -+ vshr.s64 q11, q6, #26 -+ vand q6, q6, q1 -+ vadd.i64 q9, q9, q11 -+ vadd.i64 d25, d19, d21 -+ vsub.i64 q3, q3, q6 -+ vshr.s64 d23, d25, #25 -+ vand q4, q12, q4 -+ vadd.i64 d21, d23, d23 -+ vshl.i64 d25, d23, #4 -+ vadd.i64 d21, d21, d23 -+ vadd.i64 d25, d25, d21 -+ vadd.i64 d4, d4, d25 -+ vzip.i32 q0, q8 -+ vadd.i64 d12, d4, d14 -+ add r6, r8, #8 -+ vst1.8 d0, [r6, : 64] -+ vsub.i64 d19, d19, d9 -+ add r6, r6, #16 -+ vst1.8 d16, [r6, : 64] -+ vshr.s64 d22, d12, #26 -+ vand q0, q6, q1 -+ vadd.i64 d10, d10, d22 -+ vzip.i32 q3, q9 -+ vsub.i64 d4, d4, d0 -+ sub r6, r6, #8 -+ vst1.8 d6, [r6, : 64] -+ add r6, r6, #16 -+ vst1.8 d18, [r6, : 64] -+ vzip.i32 q2, q5 -+ sub r6, r6, #32 -+ vst1.8 d4, [r6, : 64] -+ subs r5, r5, #1 -+ bhi ._squaringloop -+._skipsquaringloop: -+ mov r2, r2 -+ add r5, r3, #288 -+ add r6, r3, #144 -+ vmov.i32 q0, #19 -+ vmov.i32 q1, #0 -+ vmov.i32 q2, #1 -+ vzip.i32 q1, q2 -+ vld1.8 {d4-d5}, [r5, : 128]! -+ vld1.8 {d6-d7}, [r5, : 128]! -+ vld1.8 {d9}, [r5, : 64] -+ vld1.8 {d10-d11}, [r2, : 128]! -+ add r5, sp, #416 -+ vld1.8 {d12-d13}, [r2, : 128]! -+ vmul.i32 q7, q2, q0 -+ vld1.8 {d8}, [r2, : 64] -+ vext.32 d17, d11, d10, #1 -+ vmul.i32 q9, q3, q0 -+ vext.32 d16, d10, d8, #1 -+ vshl.u32 q10, q5, q1 -+ vext.32 d22, d14, d4, #1 -+ vext.32 d24, d18, d6, #1 -+ vshl.u32 q13, q6, q1 -+ vshl.u32 d28, d8, d2 -+ vrev64.i32 d22, d22 -+ vmul.i32 d1, d9, d1 -+ vrev64.i32 d24, d24 -+ vext.32 d29, d8, d13, #1 -+ vext.32 d0, d1, d9, #1 -+ vrev64.i32 d0, d0 -+ vext.32 d2, d9, d1, #1 -+ vext.32 d23, d15, d5, #1 -+ vmull.s32 q4, d20, d4 -+ vrev64.i32 d23, d23 -+ vmlal.s32 q4, d21, d1 -+ vrev64.i32 d2, d2 -+ vmlal.s32 q4, d26, d19 -+ vext.32 d3, d5, d15, #1 -+ vmlal.s32 q4, d27, d18 -+ vrev64.i32 d3, d3 -+ vmlal.s32 q4, d28, d15 -+ vext.32 d14, d12, d11, #1 -+ vmull.s32 q5, d16, d23 -+ vext.32 d15, d13, d12, #1 -+ vmlal.s32 q5, d17, d4 -+ vst1.8 d8, [r5, : 64]! -+ vmlal.s32 q5, d14, d1 -+ vext.32 d12, d9, d8, #0 -+ vmlal.s32 q5, d15, d19 -+ vmov.i64 d13, #0 -+ vmlal.s32 q5, d29, d18 -+ vext.32 d25, d19, d7, #1 -+ vmlal.s32 q6, d20, d5 -+ vrev64.i32 d25, d25 -+ vmlal.s32 q6, d21, d4 -+ vst1.8 d11, [r5, : 64]! -+ vmlal.s32 q6, d26, d1 -+ vext.32 d9, d10, d10, #0 -+ vmlal.s32 q6, d27, d19 -+ vmov.i64 d8, #0 -+ vmlal.s32 q6, d28, d18 -+ vmlal.s32 q4, d16, d24 -+ vmlal.s32 q4, d17, d5 -+ vmlal.s32 q4, d14, d4 -+ vst1.8 d12, [r5, : 64]! -+ vmlal.s32 q4, d15, d1 -+ vext.32 d10, d13, d12, #0 -+ vmlal.s32 q4, d29, d19 -+ vmov.i64 d11, #0 -+ vmlal.s32 q5, d20, d6 -+ vmlal.s32 q5, d21, d5 -+ vmlal.s32 q5, d26, d4 -+ vext.32 d13, d8, d8, #0 -+ vmlal.s32 q5, d27, d1 -+ vmov.i64 d12, #0 -+ vmlal.s32 q5, d28, d19 -+ vst1.8 d9, [r5, : 64]! -+ vmlal.s32 q6, d16, d25 -+ vmlal.s32 q6, d17, d6 -+ vst1.8 d10, [r5, : 64] -+ vmlal.s32 q6, d14, d5 -+ vext.32 d8, d11, d10, #0 -+ vmlal.s32 q6, d15, d4 -+ vmov.i64 d9, #0 -+ vmlal.s32 q6, d29, d1 -+ vmlal.s32 q4, d20, d7 -+ vmlal.s32 q4, d21, d6 -+ vmlal.s32 q4, d26, d5 -+ vext.32 d11, d12, d12, #0 -+ vmlal.s32 q4, d27, d4 -+ vmov.i64 d10, #0 -+ vmlal.s32 q4, d28, d1 -+ vmlal.s32 q5, d16, d0 -+ sub r2, r5, #32 -+ vmlal.s32 q5, d17, d7 -+ vmlal.s32 q5, d14, d6 -+ vext.32 d30, d9, d8, #0 -+ vmlal.s32 q5, d15, d5 -+ vld1.8 {d31}, [r2, : 64]! -+ vmlal.s32 q5, d29, d4 -+ vmlal.s32 q15, d20, d0 -+ vext.32 d0, d6, d18, #1 -+ vmlal.s32 q15, d21, d25 -+ vrev64.i32 d0, d0 -+ vmlal.s32 q15, d26, d24 -+ vext.32 d1, d7, d19, #1 -+ vext.32 d7, d10, d10, #0 -+ vmlal.s32 q15, d27, d23 -+ vrev64.i32 d1, d1 -+ vld1.8 {d6}, [r2, : 64] -+ vmlal.s32 q15, d28, d22 -+ vmlal.s32 q3, d16, d4 -+ add r2, r2, #24 -+ vmlal.s32 q3, d17, d2 -+ vext.32 d4, d31, d30, #0 -+ vmov d17, d11 -+ vmlal.s32 q3, d14, d1 -+ vext.32 d11, d13, d13, #0 -+ vext.32 d13, d30, d30, #0 -+ vmlal.s32 q3, d15, d0 -+ vext.32 d1, d8, d8, #0 -+ vmlal.s32 q3, d29, d3 -+ vld1.8 {d5}, [r2, : 64] -+ sub r2, r2, #16 -+ vext.32 d10, d6, d6, #0 -+ vmov.i32 q1, #0xffffffff -+ vshl.i64 q4, q1, #25 -+ add r5, sp, #512 -+ vld1.8 {d14-d15}, [r5, : 128] -+ vadd.i64 q9, q2, q7 -+ vshl.i64 q1, q1, #26 -+ vshr.s64 q10, q9, #26 -+ vld1.8 {d0}, [r2, : 64]! -+ vadd.i64 q5, q5, q10 -+ vand q9, q9, q1 -+ vld1.8 {d16}, [r2, : 64]! -+ add r2, sp, #528 -+ vld1.8 {d20-d21}, [r2, : 128] -+ vadd.i64 q11, q5, q10 -+ vsub.i64 q2, q2, q9 -+ vshr.s64 q9, q11, #25 -+ vext.32 d12, d5, d4, #0 -+ vand q11, q11, q4 -+ vadd.i64 q0, q0, q9 -+ vmov d19, d7 -+ vadd.i64 q3, q0, q7 -+ vsub.i64 q5, q5, q11 -+ vshr.s64 q11, q3, #26 -+ vext.32 d18, d11, d10, #0 -+ vand q3, q3, q1 -+ vadd.i64 q8, q8, q11 -+ vadd.i64 q11, q8, q10 -+ vsub.i64 q0, q0, q3 -+ vshr.s64 q3, q11, #25 -+ vand q11, q11, q4 -+ vadd.i64 q3, q6, q3 -+ vadd.i64 q6, q3, q7 -+ vsub.i64 q8, q8, q11 -+ vshr.s64 q11, q6, #26 -+ vand q6, q6, q1 -+ vadd.i64 q9, q9, q11 -+ vadd.i64 d25, d19, d21 -+ vsub.i64 q3, q3, q6 -+ vshr.s64 d23, d25, #25 -+ vand q4, q12, q4 -+ vadd.i64 d21, d23, d23 -+ vshl.i64 d25, d23, #4 -+ vadd.i64 d21, d21, d23 -+ vadd.i64 d25, d25, d21 -+ vadd.i64 d4, d4, d25 -+ vzip.i32 q0, q8 -+ vadd.i64 d12, d4, d14 -+ add r2, r6, #8 -+ vst1.8 d0, [r2, : 64] -+ vsub.i64 d19, d19, d9 -+ add r2, r2, #16 -+ vst1.8 d16, [r2, : 64] -+ vshr.s64 d22, d12, #26 -+ vand q0, q6, q1 -+ vadd.i64 d10, d10, d22 -+ vzip.i32 q3, q9 -+ vsub.i64 d4, d4, d0 -+ sub r2, r2, #8 -+ vst1.8 d6, [r2, : 64] -+ add r2, r2, #16 -+ vst1.8 d18, [r2, : 64] -+ vzip.i32 q2, q5 -+ sub r2, r2, #32 -+ vst1.8 d4, [r2, : 64] -+ cmp r4, #0 -+ beq ._skippostcopy -+ add r2, r3, #144 -+ mov r4, r4 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d4}, [r2, : 64] -+ vst1.8 {d0-d1}, [r4, : 128]! -+ vst1.8 {d2-d3}, [r4, : 128]! -+ vst1.8 d4, [r4, : 64] -+._skippostcopy: -+ cmp r1, #1 -+ bne ._skipfinalcopy -+ add r2, r3, #288 -+ add r4, r3, #144 -+ vld1.8 {d0-d1}, [r2, : 128]! -+ vld1.8 {d2-d3}, [r2, : 128]! -+ vld1.8 {d4}, [r2, : 64] -+ vst1.8 {d0-d1}, [r4, : 128]! -+ vst1.8 {d2-d3}, [r4, : 128]! -+ vst1.8 d4, [r4, : 64] -+._skipfinalcopy: -+ add r1, r1, #1 -+ cmp r1, #12 -+ blo ._invertloop -+ add r1, r3, #144 -+ ldr r2, [r1], #4 -+ ldr r3, [r1], #4 -+ ldr r4, [r1], #4 -+ ldr r5, [r1], #4 -+ ldr r6, [r1], #4 -+ ldr r7, [r1], #4 -+ ldr r8, [r1], #4 -+ ldr r9, [r1], #4 -+ ldr r10, [r1], #4 -+ ldr r1, [r1] -+ add r11, r1, r1, LSL #4 -+ add r11, r11, r1, LSL #1 -+ add r11, r11, #16777216 -+ mov r11, r11, ASR #25 -+ add r11, r11, r2 -+ mov r11, r11, ASR #26 -+ add r11, r11, r3 -+ mov r11, r11, ASR #25 -+ add r11, r11, r4 -+ mov r11, r11, ASR #26 -+ add r11, r11, r5 -+ mov r11, r11, ASR #25 -+ add r11, r11, r6 -+ mov r11, r11, ASR #26 -+ add r11, r11, r7 -+ mov r11, r11, ASR #25 -+ add r11, r11, r8 -+ mov r11, r11, ASR #26 -+ add r11, r11, r9 -+ mov r11, r11, ASR #25 -+ add r11, r11, r10 -+ mov r11, r11, ASR #26 -+ add r11, r11, r1 -+ mov r11, r11, ASR #25 -+ add r2, r2, r11 -+ add r2, r2, r11, LSL #1 -+ add r2, r2, r11, LSL #4 -+ mov r11, r2, ASR #26 -+ add r3, r3, r11 -+ sub r2, r2, r11, LSL #26 -+ mov r11, r3, ASR #25 -+ add r4, r4, r11 -+ sub r3, r3, r11, LSL #25 -+ mov r11, r4, ASR #26 -+ add r5, r5, r11 -+ sub r4, r4, r11, LSL #26 -+ mov r11, r5, ASR #25 -+ add r6, r6, r11 -+ sub r5, r5, r11, LSL #25 -+ mov r11, r6, ASR #26 -+ add r7, r7, r11 -+ sub r6, r6, r11, LSL #26 -+ mov r11, r7, ASR #25 -+ add r8, r8, r11 -+ sub r7, r7, r11, LSL #25 -+ mov r11, r8, ASR #26 -+ add r9, r9, r11 -+ sub r8, r8, r11, LSL #26 -+ mov r11, r9, ASR #25 -+ add r10, r10, r11 -+ sub r9, r9, r11, LSL #25 -+ mov r11, r10, ASR #26 -+ add r1, r1, r11 -+ sub r10, r10, r11, LSL #26 -+ mov r11, r1, ASR #25 -+ sub r1, r1, r11, LSL #25 -+ add r2, r2, r3, LSL #26 -+ mov r3, r3, LSR #6 -+ add r3, r3, r4, LSL #19 -+ mov r4, r4, LSR #13 -+ add r4, r4, r5, LSL #13 -+ mov r5, r5, LSR #19 -+ add r5, r5, r6, LSL #6 -+ add r6, r7, r8, LSL #25 -+ mov r7, r8, LSR #7 -+ add r7, r7, r9, LSL #19 -+ mov r8, r9, LSR #13 -+ add r8, r8, r10, LSL #12 -+ mov r9, r10, LSR #20 -+ add r1, r9, r1, LSL #6 -+ str r2, [r0], #4 -+ str r3, [r0], #4 -+ str r4, [r0], #4 -+ str r5, [r0], #4 -+ str r6, [r0], #4 -+ str r7, [r0], #4 -+ str r8, [r0], #4 -+ str r1, [r0] -+ ldrd r4, [sp, #0] -+ ldrd r6, [sp, #8] -+ ldrd r8, [sp, #16] -+ ldrd r10, [sp, #24] -+ ldr r12, [sp, #480] -+ ldr r14, [sp, #484] -+ ldr r0, =0 -+ mov sp, r12 -+ vpop {q4, q5, q6, q7} -+ bx lr diff --git a/target/linux/generic/backport-5.4/080-wireguard-0031-crypto-arm-curve25519-wire-up-NEON-implementation.patch b/target/linux/generic/backport-5.4/080-wireguard-0031-crypto-arm-curve25519-wire-up-NEON-implementation.patch deleted file mode 100644 index d84726b616..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0031-crypto-arm-curve25519-wire-up-NEON-implementation.patch +++ /dev/null @@ -1,1058 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:38 +0100 -Subject: [PATCH] crypto: arm/curve25519 - wire up NEON implementation - -commit d8f1308a025fc7e00414194ed742d5f05a21e13c upstream. - -This ports the SUPERCOP implementation for usage in kernel space. In -addition to the usual header, macro, and style changes required for -kernel space, it makes a few small changes to the code: - - - The stack alignment is relaxed to 16 bytes. - - Superfluous mov statements have been removed. - - ldr for constants has been replaced with movw. - - ldreq has been replaced with moveq. - - The str epilogue has been made more idiomatic. - - SIMD registers are not pushed and popped at the beginning and end. - - The prologue and epilogue have been made idiomatic. - - A hole has been removed from the stack, saving 32 bytes. - - We write-back the base register whenever possible for vld1.8. - - Some multiplications have been reordered for better A7 performance. - -There are more opportunities for cleanup, since this code is from qhasm, -which doesn't always do the most opportune thing. But even prior to -extensive hand optimizations, this code delivers significant performance -improvements (given in get_cycles() per call): - - ----------- ------------- - | generic C | this commit | - ------------ ----------- ------------- - | Cortex-A7 | 49136 | 22395 | - ------------ ----------- ------------- - | Cortex-A17 | 17326 | 4983 | - ------------ ----------- ------------- - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -[ardb: - move to arch/arm/crypto - - wire into lib/crypto framework - - implement crypto API KPP hooks ] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/Kconfig | 6 + - arch/arm/crypto/Makefile | 2 + - arch/arm/crypto/curve25519-core.S | 347 +++++++++++++----------------- - arch/arm/crypto/curve25519-glue.c | 127 +++++++++++ - 4 files changed, 287 insertions(+), 195 deletions(-) - create mode 100644 arch/arm/crypto/curve25519-glue.c - ---- a/arch/arm/crypto/Kconfig -+++ b/arch/arm/crypto/Kconfig -@@ -141,4 +141,10 @@ config CRYPTO_NHPOLY1305_NEON - depends on KERNEL_MODE_NEON - select CRYPTO_NHPOLY1305 - -+config CRYPTO_CURVE25519_NEON -+ tristate "NEON accelerated Curve25519 scalar multiplication library" -+ depends on KERNEL_MODE_NEON -+ select CRYPTO_LIB_CURVE25519_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519 -+ - endif ---- a/arch/arm/crypto/Makefile -+++ b/arch/arm/crypto/Makefile -@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha51 - obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o - obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o -+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o - - ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o - ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -@@ -58,6 +59,7 @@ chacha-neon-y := chacha-scalar-core.o ch - chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o - poly1305-arm-y := poly1305-core.o poly1305-glue.o - nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o -+curve25519-neon-y := curve25519-core.o curve25519-glue.o - - ifdef REGENERATE_ARM_CRYPTO - quiet_cmd_perl = PERL $@ ---- a/arch/arm/crypto/curve25519-core.S -+++ b/arch/arm/crypto/curve25519-core.S -@@ -1,43 +1,35 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ - /* -- * Public domain code from Daniel J. Bernstein and Peter Schwabe, from -- * SUPERCOP's curve25519/neon2/scalarmult.s. -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This -+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been -+ * manually reworked for use in kernel space. - */ - --.fpu neon -+#include <linux/linkage.h> -+ - .text -+.fpu neon -+.arch armv7-a - .align 4 --.global _crypto_scalarmult_curve25519_neon2 --.global crypto_scalarmult_curve25519_neon2 --.type _crypto_scalarmult_curve25519_neon2 STT_FUNC --.type crypto_scalarmult_curve25519_neon2 STT_FUNC -- _crypto_scalarmult_curve25519_neon2: -- crypto_scalarmult_curve25519_neon2: -- vpush {q4, q5, q6, q7} -- mov r12, sp -- sub sp, sp, #736 -- and sp, sp, #0xffffffe0 -- strd r4, [sp, #0] -- strd r6, [sp, #8] -- strd r8, [sp, #16] -- strd r10, [sp, #24] -- str r12, [sp, #480] -- str r14, [sp, #484] -- mov r0, r0 -- mov r1, r1 -- mov r2, r2 -- add r3, sp, #32 -- ldr r4, =0 -- ldr r5, =254 -+ -+ENTRY(curve25519_neon) -+ push {r4-r11, lr} -+ mov ip, sp -+ sub r3, sp, #704 -+ and r3, r3, #0xfffffff0 -+ mov sp, r3 -+ movw r4, #0 -+ movw r5, #254 - vmov.i32 q0, #1 - vshr.u64 q1, q0, #7 - vshr.u64 q0, q0, #8 - vmov.i32 d4, #19 - vmov.i32 d5, #38 -- add r6, sp, #512 -- vst1.8 {d2-d3}, [r6, : 128] -- add r6, sp, #528 -- vst1.8 {d0-d1}, [r6, : 128] -- add r6, sp, #544 -+ add r6, sp, #480 -+ vst1.8 {d2-d3}, [r6, : 128]! -+ vst1.8 {d0-d1}, [r6, : 128]! - vst1.8 {d4-d5}, [r6, : 128] - add r6, r3, #0 - vmov.i32 q2, #0 -@@ -45,12 +37,12 @@ - vst1.8 {d4-d5}, [r6, : 128]! - vst1.8 d4, [r6, : 64] - add r6, r3, #0 -- ldr r7, =960 -+ movw r7, #960 - sub r7, r7, #2 - neg r7, r7 - sub r7, r7, r7, LSL #7 - str r7, [r6] -- add r6, sp, #704 -+ add r6, sp, #672 - vld1.8 {d4-d5}, [r1]! - vld1.8 {d6-d7}, [r1] - vst1.8 {d4-d5}, [r6, : 128]! -@@ -212,15 +204,15 @@ - vst1.8 {d0-d1}, [r6, : 128]! - vst1.8 {d2-d3}, [r6, : 128]! - vst1.8 d4, [r6, : 64] --._mainloop: -+.Lmainloop: - mov r2, r5, LSR #3 - and r6, r5, #7 - ldrb r2, [r1, r2] - mov r2, r2, LSR r6 - and r2, r2, #1 -- str r5, [sp, #488] -+ str r5, [sp, #456] - eor r4, r4, r2 -- str r2, [sp, #492] -+ str r2, [sp, #460] - neg r2, r4 - add r4, r3, #96 - add r5, r3, #192 -@@ -291,7 +283,7 @@ - vsub.i32 q0, q1, q3 - vst1.8 d4, [r4, : 64] - vst1.8 d0, [r6, : 64] -- add r2, sp, #544 -+ add r2, sp, #512 - add r4, r3, #96 - add r5, r3, #144 - vld1.8 {d0-d1}, [r2, : 128] -@@ -361,14 +353,13 @@ - vmlal.s32 q0, d12, d8 - vmlal.s32 q0, d13, d17 - vmlal.s32 q0, d6, d6 -- add r2, sp, #512 -- vld1.8 {d18-d19}, [r2, : 128] -+ add r2, sp, #480 -+ vld1.8 {d18-d19}, [r2, : 128]! - vmull.s32 q3, d16, d7 - vmlal.s32 q3, d10, d15 - vmlal.s32 q3, d11, d14 - vmlal.s32 q3, d12, d9 - vmlal.s32 q3, d13, d8 -- add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vadd.i64 q5, q12, q9 - vadd.i64 q6, q15, q9 -@@ -502,22 +493,19 @@ - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 -- add r2, sp, #560 -+ add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 -- add r2, sp, #576 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 -- add r2, sp, #592 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 -- add r2, sp, #608 -- vst1.8 {d16-d17}, [r2, : 128] -+ vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 -@@ -528,8 +516,7 @@ - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 -- add r2, sp, #624 -- vst1.8 {d14-d15}, [r2, : 128] -+ vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 -@@ -537,8 +524,7 @@ - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 -- add r2, sp, #640 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 -@@ -547,14 +533,12 @@ - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 -- add r2, sp, #656 -- vst1.8 {d10-d11}, [r2, : 128] -+ vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 -- add r2, sp, #672 - vst1.8 {d16-d17}, [r2, : 128] - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 -@@ -566,7 +550,7 @@ - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 -@@ -575,32 +559,30 @@ - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 -- add r2, sp, #576 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 -- add r2, sp, #656 -+ add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 -- add r2, sp, #624 -+ add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 -- add r2, sp, #608 - vst1.8 {d8-d9}, [r2, : 128] -- add r2, sp, #560 -+ add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 -@@ -621,36 +603,36 @@ - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 -- add r2, sp, #640 -+ add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 -- vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d18, d6 -- vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d18, d21 -- vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d18, d28 -- vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d18, d29 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 -- add r2, sp, #592 -+ add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] -- add r2, sp, #512 -+ add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 -- add r2, sp, #528 -+ add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 -- add r2, sp, #672 -+ add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 -@@ -823,22 +805,19 @@ - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 -- add r2, sp, #560 -+ add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 -- add r2, sp, #576 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 -- add r2, sp, #592 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 -- add r2, sp, #608 -- vst1.8 {d16-d17}, [r2, : 128] -+ vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 -@@ -849,8 +828,7 @@ - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 -- add r2, sp, #624 -- vst1.8 {d14-d15}, [r2, : 128] -+ vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 -@@ -858,8 +836,7 @@ - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 -- add r2, sp, #640 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 -@@ -868,15 +845,13 @@ - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 -- add r2, sp, #656 -- vst1.8 {d10-d11}, [r2, : 128] -+ vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 -- add r2, sp, #672 -- vst1.8 {d16-d17}, [r2, : 128] -+ vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 - vmlal.s32 q4, d19, d7 -@@ -887,7 +862,7 @@ - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 -@@ -896,32 +871,30 @@ - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 -- add r2, sp, #576 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 -- add r2, sp, #656 -+ add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 -- add r2, sp, #624 -+ add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 -- add r2, sp, #608 - vst1.8 {d8-d9}, [r2, : 128] -- add r2, sp, #560 -+ add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 -@@ -942,36 +915,36 @@ - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 -- add r2, sp, #640 -+ add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 -- vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d18, d6 -- vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d18, d21 -- vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d18, d28 -- vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d18, d29 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 -- add r2, sp, #592 -+ add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] -- add r2, sp, #512 -+ add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 -- add r2, sp, #528 -+ add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 -- add r2, sp, #672 -+ add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 -@@ -1069,7 +1042,7 @@ - sub r4, r4, #24 - vst1.8 d0, [r2, : 64] - vst1.8 d1, [r4, : 64] -- add r2, sp, #544 -+ add r2, sp, #512 - add r4, r3, #144 - add r5, r3, #192 - vld1.8 {d0-d1}, [r2, : 128] -@@ -1139,14 +1112,13 @@ - vmlal.s32 q0, d12, d8 - vmlal.s32 q0, d13, d17 - vmlal.s32 q0, d6, d6 -- add r2, sp, #512 -- vld1.8 {d18-d19}, [r2, : 128] -+ add r2, sp, #480 -+ vld1.8 {d18-d19}, [r2, : 128]! - vmull.s32 q3, d16, d7 - vmlal.s32 q3, d10, d15 - vmlal.s32 q3, d11, d14 - vmlal.s32 q3, d12, d9 - vmlal.s32 q3, d13, d8 -- add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vadd.i64 q5, q12, q9 - vadd.i64 q6, q15, q9 -@@ -1295,22 +1267,19 @@ - vadd.i32 q5, q5, q0 - vtrn.32 q11, q14 - vadd.i32 q6, q6, q3 -- add r2, sp, #560 -+ add r2, sp, #528 - vadd.i32 q10, q10, q2 - vtrn.32 d24, d25 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q6, q13, #1 -- add r2, sp, #576 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vshl.i32 q10, q14, #1 -- add r2, sp, #592 -- vst1.8 {d12-d13}, [r2, : 128] -+ vst1.8 {d12-d13}, [r2, : 128]! - vshl.i32 q15, q12, #1 - vadd.i32 q8, q8, q4 - vext.32 d10, d31, d30, #0 - vadd.i32 q7, q7, q1 -- add r2, sp, #608 -- vst1.8 {d16-d17}, [r2, : 128] -+ vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q8, d18, d5 - vmlal.s32 q8, d26, d4 - vmlal.s32 q8, d19, d9 -@@ -1321,8 +1290,7 @@ - vmlal.s32 q8, d29, d1 - vmlal.s32 q8, d24, d6 - vmlal.s32 q8, d25, d0 -- add r2, sp, #624 -- vst1.8 {d14-d15}, [r2, : 128] -+ vst1.8 {d14-d15}, [r2, : 128]! - vmull.s32 q2, d18, d4 - vmlal.s32 q2, d12, d9 - vmlal.s32 q2, d13, d8 -@@ -1330,8 +1298,7 @@ - vmlal.s32 q2, d22, d2 - vmlal.s32 q2, d23, d1 - vmlal.s32 q2, d24, d0 -- add r2, sp, #640 -- vst1.8 {d20-d21}, [r2, : 128] -+ vst1.8 {d20-d21}, [r2, : 128]! - vmull.s32 q7, d18, d9 - vmlal.s32 q7, d26, d3 - vmlal.s32 q7, d19, d8 -@@ -1340,15 +1307,13 @@ - vmlal.s32 q7, d28, d1 - vmlal.s32 q7, d23, d6 - vmlal.s32 q7, d29, d0 -- add r2, sp, #656 -- vst1.8 {d10-d11}, [r2, : 128] -+ vst1.8 {d10-d11}, [r2, : 128]! - vmull.s32 q5, d18, d3 - vmlal.s32 q5, d19, d2 - vmlal.s32 q5, d22, d1 - vmlal.s32 q5, d23, d0 - vmlal.s32 q5, d12, d8 -- add r2, sp, #672 -- vst1.8 {d16-d17}, [r2, : 128] -+ vst1.8 {d16-d17}, [r2, : 128]! - vmull.s32 q4, d18, d8 - vmlal.s32 q4, d26, d2 - vmlal.s32 q4, d19, d7 -@@ -1359,7 +1324,7 @@ - vmlal.s32 q8, d26, d1 - vmlal.s32 q8, d19, d6 - vmlal.s32 q8, d27, d0 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q7, d24, d21 - vmlal.s32 q7, d25, d20 -@@ -1368,32 +1333,30 @@ - vmlal.s32 q8, d22, d21 - vmlal.s32 q8, d28, d20 - vmlal.s32 q5, d24, d20 -- add r2, sp, #576 - vst1.8 {d14-d15}, [r2, : 128] - vmull.s32 q7, d18, d6 - vmlal.s32 q7, d26, d0 -- add r2, sp, #656 -+ add r2, sp, #624 - vld1.8 {d30-d31}, [r2, : 128] - vmlal.s32 q2, d30, d21 - vmlal.s32 q7, d19, d21 - vmlal.s32 q7, d27, d20 -- add r2, sp, #624 -+ add r2, sp, #592 - vld1.8 {d26-d27}, [r2, : 128] - vmlal.s32 q4, d25, d27 - vmlal.s32 q8, d29, d27 - vmlal.s32 q8, d25, d26 - vmlal.s32 q7, d28, d27 - vmlal.s32 q7, d29, d26 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d28-d29}, [r2, : 128] - vmlal.s32 q4, d24, d29 - vmlal.s32 q8, d23, d29 - vmlal.s32 q8, d24, d28 - vmlal.s32 q7, d22, d29 - vmlal.s32 q7, d23, d28 -- add r2, sp, #608 - vst1.8 {d8-d9}, [r2, : 128] -- add r2, sp, #560 -+ add r2, sp, #528 - vld1.8 {d8-d9}, [r2, : 128] - vmlal.s32 q7, d24, d9 - vmlal.s32 q7, d25, d31 -@@ -1414,36 +1377,36 @@ - vmlal.s32 q0, d23, d26 - vmlal.s32 q0, d24, d31 - vmlal.s32 q0, d19, d20 -- add r2, sp, #640 -+ add r2, sp, #608 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q2, d18, d7 -- vmlal.s32 q2, d19, d6 - vmlal.s32 q5, d18, d6 -- vmlal.s32 q5, d19, d21 - vmlal.s32 q1, d18, d21 -- vmlal.s32 q1, d19, d29 - vmlal.s32 q0, d18, d28 -- vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d18, d29 -+ vmlal.s32 q2, d19, d6 -+ vmlal.s32 q5, d19, d21 -+ vmlal.s32 q1, d19, d29 -+ vmlal.s32 q0, d19, d9 - vmlal.s32 q6, d19, d28 -- add r2, sp, #592 -+ add r2, sp, #560 - vld1.8 {d18-d19}, [r2, : 128] -- add r2, sp, #512 -+ add r2, sp, #480 - vld1.8 {d22-d23}, [r2, : 128] - vmlal.s32 q5, d19, d7 - vmlal.s32 q0, d18, d21 - vmlal.s32 q0, d19, d29 - vmlal.s32 q6, d18, d6 -- add r2, sp, #528 -+ add r2, sp, #496 - vld1.8 {d6-d7}, [r2, : 128] - vmlal.s32 q6, d19, d21 -- add r2, sp, #576 -+ add r2, sp, #544 - vld1.8 {d18-d19}, [r2, : 128] - vmlal.s32 q0, d30, d8 -- add r2, sp, #672 -+ add r2, sp, #640 - vld1.8 {d20-d21}, [r2, : 128] - vmlal.s32 q5, d30, d29 -- add r2, sp, #608 -+ add r2, sp, #576 - vld1.8 {d24-d25}, [r2, : 128] - vmlal.s32 q1, d30, d28 - vadd.i64 q13, q0, q11 -@@ -1541,10 +1504,10 @@ - sub r4, r4, #24 - vst1.8 d0, [r2, : 64] - vst1.8 d1, [r4, : 64] -- ldr r2, [sp, #488] -- ldr r4, [sp, #492] -+ ldr r2, [sp, #456] -+ ldr r4, [sp, #460] - subs r5, r2, #1 -- bge ._mainloop -+ bge .Lmainloop - add r1, r3, #144 - add r2, r3, #336 - vld1.8 {d0-d1}, [r1, : 128]! -@@ -1553,41 +1516,41 @@ - vst1.8 {d0-d1}, [r2, : 128]! - vst1.8 {d2-d3}, [r2, : 128]! - vst1.8 d4, [r2, : 64] -- ldr r1, =0 --._invertloop: -+ movw r1, #0 -+.Linvertloop: - add r2, r3, #144 -- ldr r4, =0 -- ldr r5, =2 -+ movw r4, #0 -+ movw r5, #2 - cmp r1, #1 -- ldreq r5, =1 -+ moveq r5, #1 - addeq r2, r3, #336 - addeq r4, r3, #48 - cmp r1, #2 -- ldreq r5, =1 -+ moveq r5, #1 - addeq r2, r3, #48 - cmp r1, #3 -- ldreq r5, =5 -+ moveq r5, #5 - addeq r4, r3, #336 - cmp r1, #4 -- ldreq r5, =10 -+ moveq r5, #10 - cmp r1, #5 -- ldreq r5, =20 -+ moveq r5, #20 - cmp r1, #6 -- ldreq r5, =10 -+ moveq r5, #10 - addeq r2, r3, #336 - addeq r4, r3, #336 - cmp r1, #7 -- ldreq r5, =50 -+ moveq r5, #50 - cmp r1, #8 -- ldreq r5, =100 -+ moveq r5, #100 - cmp r1, #9 -- ldreq r5, =50 -+ moveq r5, #50 - addeq r2, r3, #336 - cmp r1, #10 -- ldreq r5, =5 -+ moveq r5, #5 - addeq r2, r3, #48 - cmp r1, #11 -- ldreq r5, =0 -+ moveq r5, #0 - addeq r2, r3, #96 - add r6, r3, #144 - add r7, r3, #288 -@@ -1598,8 +1561,8 @@ - vst1.8 {d2-d3}, [r7, : 128]! - vst1.8 d4, [r7, : 64] - cmp r5, #0 -- beq ._skipsquaringloop --._squaringloop: -+ beq .Lskipsquaringloop -+.Lsquaringloop: - add r6, r3, #288 - add r7, r3, #288 - add r8, r3, #288 -@@ -1611,7 +1574,7 @@ - vld1.8 {d6-d7}, [r7, : 128]! - vld1.8 {d9}, [r7, : 64] - vld1.8 {d10-d11}, [r6, : 128]! -- add r7, sp, #416 -+ add r7, sp, #384 - vld1.8 {d12-d13}, [r6, : 128]! - vmul.i32 q7, q2, q0 - vld1.8 {d8}, [r6, : 64] -@@ -1726,7 +1689,7 @@ - vext.32 d10, d6, d6, #0 - vmov.i32 q1, #0xffffffff - vshl.i64 q4, q1, #25 -- add r7, sp, #512 -+ add r7, sp, #480 - vld1.8 {d14-d15}, [r7, : 128] - vadd.i64 q9, q2, q7 - vshl.i64 q1, q1, #26 -@@ -1735,7 +1698,7 @@ - vadd.i64 q5, q5, q10 - vand q9, q9, q1 - vld1.8 {d16}, [r6, : 64]! -- add r6, sp, #528 -+ add r6, sp, #496 - vld1.8 {d20-d21}, [r6, : 128] - vadd.i64 q11, q5, q10 - vsub.i64 q2, q2, q9 -@@ -1789,8 +1752,8 @@ - sub r6, r6, #32 - vst1.8 d4, [r6, : 64] - subs r5, r5, #1 -- bhi ._squaringloop --._skipsquaringloop: -+ bhi .Lsquaringloop -+.Lskipsquaringloop: - mov r2, r2 - add r5, r3, #288 - add r6, r3, #144 -@@ -1802,7 +1765,7 @@ - vld1.8 {d6-d7}, [r5, : 128]! - vld1.8 {d9}, [r5, : 64] - vld1.8 {d10-d11}, [r2, : 128]! -- add r5, sp, #416 -+ add r5, sp, #384 - vld1.8 {d12-d13}, [r2, : 128]! - vmul.i32 q7, q2, q0 - vld1.8 {d8}, [r2, : 64] -@@ -1917,7 +1880,7 @@ - vext.32 d10, d6, d6, #0 - vmov.i32 q1, #0xffffffff - vshl.i64 q4, q1, #25 -- add r5, sp, #512 -+ add r5, sp, #480 - vld1.8 {d14-d15}, [r5, : 128] - vadd.i64 q9, q2, q7 - vshl.i64 q1, q1, #26 -@@ -1926,7 +1889,7 @@ - vadd.i64 q5, q5, q10 - vand q9, q9, q1 - vld1.8 {d16}, [r2, : 64]! -- add r2, sp, #528 -+ add r2, sp, #496 - vld1.8 {d20-d21}, [r2, : 128] - vadd.i64 q11, q5, q10 - vsub.i64 q2, q2, q9 -@@ -1980,7 +1943,7 @@ - sub r2, r2, #32 - vst1.8 d4, [r2, : 64] - cmp r4, #0 -- beq ._skippostcopy -+ beq .Lskippostcopy - add r2, r3, #144 - mov r4, r4 - vld1.8 {d0-d1}, [r2, : 128]! -@@ -1989,9 +1952,9 @@ - vst1.8 {d0-d1}, [r4, : 128]! - vst1.8 {d2-d3}, [r4, : 128]! - vst1.8 d4, [r4, : 64] --._skippostcopy: -+.Lskippostcopy: - cmp r1, #1 -- bne ._skipfinalcopy -+ bne .Lskipfinalcopy - add r2, r3, #288 - add r4, r3, #144 - vld1.8 {d0-d1}, [r2, : 128]! -@@ -2000,10 +1963,10 @@ - vst1.8 {d0-d1}, [r4, : 128]! - vst1.8 {d2-d3}, [r4, : 128]! - vst1.8 d4, [r4, : 64] --._skipfinalcopy: -+.Lskipfinalcopy: - add r1, r1, #1 - cmp r1, #12 -- blo ._invertloop -+ blo .Linvertloop - add r1, r3, #144 - ldr r2, [r1], #4 - ldr r3, [r1], #4 -@@ -2085,21 +2048,15 @@ - add r8, r8, r10, LSL #12 - mov r9, r10, LSR #20 - add r1, r9, r1, LSL #6 -- str r2, [r0], #4 -- str r3, [r0], #4 -- str r4, [r0], #4 -- str r5, [r0], #4 -- str r6, [r0], #4 -- str r7, [r0], #4 -- str r8, [r0], #4 -- str r1, [r0] -- ldrd r4, [sp, #0] -- ldrd r6, [sp, #8] -- ldrd r8, [sp, #16] -- ldrd r10, [sp, #24] -- ldr r12, [sp, #480] -- ldr r14, [sp, #484] -- ldr r0, =0 -- mov sp, r12 -- vpop {q4, q5, q6, q7} -- bx lr -+ str r2, [r0] -+ str r3, [r0, #4] -+ str r4, [r0, #8] -+ str r5, [r0, #12] -+ str r6, [r0, #16] -+ str r7, [r0, #20] -+ str r8, [r0, #24] -+ str r1, [r0, #28] -+ movw r0, #0 -+ mov sp, ip -+ pop {r4-r11, pc} -+ENDPROC(curve25519_neon) ---- /dev/null -+++ b/arch/arm/crypto/curve25519-glue.c -@@ -0,0 +1,127 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This -+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been -+ * manually reworked for use in kernel space. -+ */ -+ -+#include <asm/hwcap.h> -+#include <asm/neon.h> -+#include <asm/simd.h> -+#include <crypto/internal/kpp.h> -+#include <crypto/internal/simd.h> -+#include <linux/types.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/jump_label.h> -+#include <crypto/curve25519.h> -+ -+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE], -+ const u8 basepoint[CURVE25519_KEY_SIZE]); -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -+ -+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], -+ const u8 scalar[CURVE25519_KEY_SIZE], -+ const u8 point[CURVE25519_KEY_SIZE]) -+{ -+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) { -+ kernel_neon_begin(); -+ curve25519_neon(out, scalar, point); -+ kernel_neon_end(); -+ } else { -+ curve25519_generic(out, scalar, point); -+ } -+} -+EXPORT_SYMBOL(curve25519_arch); -+ -+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, -+ unsigned int len) -+{ -+ u8 *secret = kpp_tfm_ctx(tfm); -+ -+ if (!len) -+ curve25519_generate_secret(secret); -+ else if (len == CURVE25519_KEY_SIZE && -+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) -+ memcpy(secret, buf, CURVE25519_KEY_SIZE); -+ else -+ return -EINVAL; -+ return 0; -+} -+ -+static int curve25519_compute_value(struct kpp_request *req) -+{ -+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); -+ const u8 *secret = kpp_tfm_ctx(tfm); -+ u8 public_key[CURVE25519_KEY_SIZE]; -+ u8 buf[CURVE25519_KEY_SIZE]; -+ int copied, nbytes; -+ u8 const *bp; -+ -+ if (req->src) { -+ copied = sg_copy_to_buffer(req->src, -+ sg_nents_for_len(req->src, -+ CURVE25519_KEY_SIZE), -+ public_key, CURVE25519_KEY_SIZE); -+ if (copied != CURVE25519_KEY_SIZE) -+ return -EINVAL; -+ bp = public_key; -+ } else { -+ bp = curve25519_base_point; -+ } -+ -+ curve25519_arch(buf, secret, bp); -+ -+ /* might want less than we've got */ -+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); -+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, -+ nbytes), -+ buf, nbytes); -+ if (copied != nbytes) -+ return -EINVAL; -+ return 0; -+} -+ -+static unsigned int curve25519_max_size(struct crypto_kpp *tfm) -+{ -+ return CURVE25519_KEY_SIZE; -+} -+ -+static struct kpp_alg curve25519_alg = { -+ .base.cra_name = "curve25519", -+ .base.cra_driver_name = "curve25519-neon", -+ .base.cra_priority = 200, -+ .base.cra_module = THIS_MODULE, -+ .base.cra_ctxsize = CURVE25519_KEY_SIZE, -+ -+ .set_secret = curve25519_set_secret, -+ .generate_public_key = curve25519_compute_value, -+ .compute_shared_secret = curve25519_compute_value, -+ .max_size = curve25519_max_size, -+}; -+ -+static int __init mod_init(void) -+{ -+ if (elf_hwcap & HWCAP_NEON) { -+ static_branch_enable(&have_neon); -+ return crypto_register_kpp(&curve25519_alg); -+ } -+ return 0; -+} -+ -+static void __exit mod_exit(void) -+{ -+ if (elf_hwcap & HWCAP_NEON) -+ crypto_unregister_kpp(&curve25519_alg); -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+ -+MODULE_ALIAS_CRYPTO("curve25519"); -+MODULE_ALIAS_CRYPTO("curve25519-neon"); -+MODULE_LICENSE("GPL v2"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0032-crypto-chacha20poly1305-import-construction-and-self.patch b/target/linux/generic/backport-5.4/080-wireguard-0032-crypto-chacha20poly1305-import-construction-and-self.patch deleted file mode 100644 index 2d5601d7ac..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0032-crypto-chacha20poly1305-import-construction-and-self.patch +++ /dev/null @@ -1,7677 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:39 +0100 -Subject: [PATCH] crypto: chacha20poly1305 - import construction and selftest - from Zinc - -commit ed20078b7e3331e82828be357147af6a3282e4ce upstream. - -This incorporates the chacha20poly1305 from the Zinc library, retaining -the library interface, but replacing the implementation with calls into -the code that already existed in the kernel's crypto API. - -Note that this library API does not implement RFC7539 fully, given that -it is limited to 64-bit nonces. (The 96-bit nonce version that was part -of the selftest only has been removed, along with the 96-bit nonce test -vectors that only tested the selftest but not the actual library itself) - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/chacha20poly1305.h | 37 + - lib/crypto/Kconfig | 7 + - lib/crypto/Makefile | 4 + - lib/crypto/chacha20poly1305-selftest.c | 7348 ++++++++++++++++++++++++ - lib/crypto/chacha20poly1305.c | 219 + - 5 files changed, 7615 insertions(+) - create mode 100644 include/crypto/chacha20poly1305.h - create mode 100644 lib/crypto/chacha20poly1305-selftest.c - create mode 100644 lib/crypto/chacha20poly1305.c - ---- /dev/null -+++ b/include/crypto/chacha20poly1305.h -@@ -0,0 +1,37 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef __CHACHA20POLY1305_H -+#define __CHACHA20POLY1305_H -+ -+#include <linux/types.h> -+ -+enum chacha20poly1305_lengths { -+ XCHACHA20POLY1305_NONCE_SIZE = 24, -+ CHACHA20POLY1305_KEY_SIZE = 32, -+ CHACHA20POLY1305_AUTHTAG_SIZE = 16 -+}; -+ -+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ -+bool __must_check -+chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ -+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ -+bool __must_check xchacha20poly1305_decrypt( -+ u8 *dst, const u8 *src, const size_t src_len, const u8 *ad, -+ const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ -+#endif /* __CHACHA20POLY1305_H */ ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -119,5 +119,12 @@ config CRYPTO_LIB_POLY1305 - by either the generic implementation or an arch-specific one, if one - is available and enabled. - -+config CRYPTO_LIB_CHACHA20POLY1305 -+ tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)" -+ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA -+ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305 -+ select CRYPTO_LIB_CHACHA -+ select CRYPTO_LIB_POLY1305 -+ - config CRYPTO_LIB_SHA256 - tristate ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -16,6 +16,9 @@ libblake2s-generic-y += blake2s-gener - obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o - libblake2s-y += blake2s.o - -+obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o -+libchacha20poly1305-y += chacha20poly1305.o -+ - obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o - libcurve25519-y := curve25519-fiat32.o - libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -@@ -32,4 +35,5 @@ libsha256-y := sha256.o - - ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) - libblake2s-y += blake2s-selftest.o -+libchacha20poly1305-y += chacha20poly1305-selftest.o - endif ---- /dev/null -+++ b/lib/crypto/chacha20poly1305-selftest.c -@@ -0,0 +1,7348 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/chacha20poly1305.h> -+#include <crypto/poly1305.h> -+ -+#include <asm/unaligned.h> -+#include <linux/bug.h> -+#include <linux/init.h> -+#include <linux/mm.h> -+#include <linux/kernel.h> -+#include <linux/slab.h> -+ -+struct chacha20poly1305_testvec { -+ const u8 *input, *output, *assoc, *nonce, *key; -+ size_t ilen, alen, nlen; -+ bool failure; -+}; -+ -+/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539 -+ * 2.8.2. After they are generated by reference implementations. And the final -+ * marked ones are taken from wycheproof, but we only do these for the encrypt -+ * side, because mostly we're stressing the primitives rather than the actual -+ * chapoly construction. -+ */ -+ -+static const u8 enc_input001[] __initconst = { -+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, -+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, -+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, -+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, -+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, -+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, -+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, -+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, -+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, -+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, -+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, -+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, -+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, -+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, -+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, -+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, -+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, -+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, -+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, -+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, -+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, -+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, -+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, -+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, -+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, -+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, -+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, -+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, -+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, -+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, -+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, -+ 0x9d -+}; -+static const u8 enc_output001[] __initconst = { -+ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, -+ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, -+ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, -+ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, -+ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, -+ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, -+ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, -+ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, -+ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, -+ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, -+ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, -+ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, -+ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, -+ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, -+ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, -+ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, -+ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, -+ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, -+ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, -+ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, -+ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, -+ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, -+ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, -+ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, -+ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, -+ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, -+ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, -+ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, -+ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, -+ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, -+ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, -+ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, -+ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, -+ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, -+ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, -+ 0x38 -+}; -+static const u8 enc_assoc001[] __initconst = { -+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x4e, 0x91 -+}; -+static const u8 enc_nonce001[] __initconst = { -+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 -+}; -+static const u8 enc_key001[] __initconst = { -+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, -+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, -+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, -+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 -+}; -+ -+static const u8 enc_input002[] __initconst = { }; -+static const u8 enc_output002[] __initconst = { -+ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, -+ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 -+}; -+static const u8 enc_assoc002[] __initconst = { }; -+static const u8 enc_nonce002[] __initconst = { -+ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e -+}; -+static const u8 enc_key002[] __initconst = { -+ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, -+ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, -+ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, -+ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 -+}; -+ -+static const u8 enc_input003[] __initconst = { }; -+static const u8 enc_output003[] __initconst = { -+ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, -+ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 -+}; -+static const u8 enc_assoc003[] __initconst = { -+ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b -+}; -+static const u8 enc_nonce003[] __initconst = { -+ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d -+}; -+static const u8 enc_key003[] __initconst = { -+ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, -+ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, -+ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, -+ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d -+}; -+ -+static const u8 enc_input004[] __initconst = { -+ 0xa4 -+}; -+static const u8 enc_output004[] __initconst = { -+ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, -+ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, -+ 0x89 -+}; -+static const u8 enc_assoc004[] __initconst = { -+ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 -+}; -+static const u8 enc_nonce004[] __initconst = { -+ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 -+}; -+static const u8 enc_key004[] __initconst = { -+ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, -+ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, -+ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, -+ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e -+}; -+ -+static const u8 enc_input005[] __initconst = { -+ 0x2d -+}; -+static const u8 enc_output005[] __initconst = { -+ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, -+ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, -+ 0xac -+}; -+static const u8 enc_assoc005[] __initconst = { }; -+static const u8 enc_nonce005[] __initconst = { -+ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 -+}; -+static const u8 enc_key005[] __initconst = { -+ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, -+ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, -+ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, -+ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 -+}; -+ -+static const u8 enc_input006[] __initconst = { -+ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, -+ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, -+ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, -+ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, -+ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, -+ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, -+ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, -+ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, -+ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, -+ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, -+ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, -+ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, -+ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, -+ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, -+ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, -+ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, -+ 0x8f -+}; -+static const u8 enc_output006[] __initconst = { -+ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, -+ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, -+ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, -+ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, -+ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, -+ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, -+ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, -+ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, -+ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, -+ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, -+ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, -+ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, -+ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, -+ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, -+ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, -+ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, -+ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, -+ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, -+ 0xeb -+}; -+static const u8 enc_assoc006[] __initconst = { -+ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b -+}; -+static const u8 enc_nonce006[] __initconst = { -+ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c -+}; -+static const u8 enc_key006[] __initconst = { -+ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, -+ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, -+ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, -+ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 -+}; -+ -+static const u8 enc_input007[] __initconst = { -+ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, -+ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, -+ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, -+ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, -+ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, -+ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, -+ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, -+ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, -+ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, -+ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, -+ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, -+ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, -+ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, -+ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, -+ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, -+ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, -+ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, -+ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, -+ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, -+ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, -+ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, -+ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, -+ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, -+ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, -+ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, -+ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, -+ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, -+ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, -+ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, -+ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, -+ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, -+ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 -+}; -+static const u8 enc_output007[] __initconst = { -+ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, -+ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, -+ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, -+ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, -+ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, -+ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, -+ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, -+ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, -+ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, -+ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, -+ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, -+ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, -+ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, -+ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, -+ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, -+ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, -+ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, -+ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, -+ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, -+ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, -+ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, -+ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, -+ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, -+ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, -+ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, -+ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, -+ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, -+ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, -+ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, -+ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, -+ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, -+ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, -+ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, -+ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 -+}; -+static const u8 enc_assoc007[] __initconst = { }; -+static const u8 enc_nonce007[] __initconst = { -+ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 -+}; -+static const u8 enc_key007[] __initconst = { -+ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, -+ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, -+ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, -+ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 -+}; -+ -+static const u8 enc_input008[] __initconst = { -+ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, -+ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, -+ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, -+ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, -+ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, -+ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, -+ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, -+ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, -+ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, -+ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, -+ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, -+ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, -+ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, -+ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, -+ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, -+ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, -+ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, -+ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, -+ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, -+ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, -+ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, -+ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, -+ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, -+ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, -+ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, -+ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, -+ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, -+ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, -+ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, -+ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, -+ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, -+ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, -+ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, -+ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, -+ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, -+ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, -+ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, -+ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, -+ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, -+ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, -+ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, -+ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, -+ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, -+ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, -+ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, -+ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, -+ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, -+ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, -+ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, -+ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, -+ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, -+ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, -+ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, -+ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, -+ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, -+ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, -+ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, -+ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, -+ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, -+ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, -+ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, -+ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, -+ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, -+ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 -+}; -+static const u8 enc_output008[] __initconst = { -+ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, -+ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, -+ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, -+ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, -+ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, -+ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, -+ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, -+ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, -+ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, -+ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, -+ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, -+ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, -+ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, -+ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, -+ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, -+ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, -+ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, -+ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, -+ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, -+ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, -+ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, -+ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, -+ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, -+ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, -+ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, -+ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, -+ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, -+ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, -+ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, -+ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, -+ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, -+ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, -+ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, -+ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, -+ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, -+ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, -+ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, -+ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, -+ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, -+ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, -+ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, -+ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, -+ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, -+ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, -+ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, -+ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, -+ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, -+ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, -+ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, -+ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, -+ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, -+ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, -+ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, -+ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, -+ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, -+ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, -+ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, -+ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, -+ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, -+ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, -+ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, -+ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, -+ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, -+ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, -+ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, -+ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 -+}; -+static const u8 enc_assoc008[] __initconst = { }; -+static const u8 enc_nonce008[] __initconst = { -+ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 -+}; -+static const u8 enc_key008[] __initconst = { -+ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, -+ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, -+ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, -+ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba -+}; -+ -+static const u8 enc_input009[] __initconst = { -+ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, -+ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, -+ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, -+ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, -+ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, -+ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, -+ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, -+ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, -+ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, -+ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, -+ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, -+ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, -+ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, -+ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, -+ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, -+ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, -+ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, -+ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, -+ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, -+ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, -+ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, -+ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, -+ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, -+ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, -+ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, -+ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, -+ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, -+ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, -+ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, -+ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, -+ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, -+ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, -+ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, -+ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, -+ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, -+ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, -+ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, -+ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, -+ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, -+ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, -+ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, -+ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, -+ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, -+ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, -+ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, -+ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, -+ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, -+ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, -+ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, -+ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, -+ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, -+ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, -+ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, -+ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, -+ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, -+ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, -+ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, -+ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, -+ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, -+ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, -+ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, -+ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, -+ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, -+ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, -+ 0x65 -+}; -+static const u8 enc_output009[] __initconst = { -+ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, -+ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, -+ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, -+ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, -+ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, -+ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, -+ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, -+ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, -+ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, -+ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, -+ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, -+ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, -+ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, -+ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, -+ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, -+ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, -+ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, -+ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, -+ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, -+ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, -+ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, -+ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, -+ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, -+ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, -+ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, -+ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, -+ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, -+ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, -+ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, -+ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, -+ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, -+ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, -+ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, -+ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, -+ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, -+ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, -+ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, -+ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, -+ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, -+ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, -+ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, -+ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, -+ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, -+ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, -+ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, -+ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, -+ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, -+ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, -+ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, -+ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, -+ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, -+ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, -+ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, -+ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, -+ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, -+ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, -+ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, -+ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, -+ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, -+ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, -+ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, -+ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, -+ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, -+ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, -+ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, -+ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, -+ 0xae -+}; -+static const u8 enc_assoc009[] __initconst = { -+ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, -+ 0xef -+}; -+static const u8 enc_nonce009[] __initconst = { -+ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 -+}; -+static const u8 enc_key009[] __initconst = { -+ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, -+ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, -+ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, -+ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b -+}; -+ -+static const u8 enc_input010[] __initconst = { -+ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, -+ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, -+ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, -+ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, -+ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, -+ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, -+ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, -+ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, -+ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, -+ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, -+ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, -+ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, -+ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, -+ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, -+ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, -+ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, -+ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, -+ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, -+ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, -+ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, -+ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, -+ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, -+ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, -+ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, -+ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, -+ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, -+ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, -+ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, -+ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, -+ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, -+ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, -+ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, -+ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, -+ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, -+ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, -+ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, -+ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, -+ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, -+ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, -+ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, -+ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, -+ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, -+ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, -+ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, -+ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, -+ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, -+ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, -+ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, -+ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, -+ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, -+ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, -+ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, -+ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, -+ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, -+ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, -+ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, -+ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, -+ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, -+ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, -+ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, -+ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, -+ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, -+ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, -+ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, -+ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, -+ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, -+ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, -+ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, -+ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, -+ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, -+ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, -+ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, -+ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, -+ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, -+ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, -+ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, -+ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, -+ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, -+ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, -+ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, -+ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, -+ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, -+ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, -+ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, -+ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, -+ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, -+ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, -+ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, -+ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, -+ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, -+ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, -+ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, -+ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, -+ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, -+ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, -+ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, -+ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, -+ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, -+ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, -+ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, -+ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, -+ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, -+ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, -+ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, -+ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, -+ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, -+ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, -+ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, -+ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, -+ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, -+ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, -+ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, -+ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, -+ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, -+ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, -+ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, -+ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, -+ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, -+ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, -+ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, -+ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, -+ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, -+ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, -+ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, -+ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, -+ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, -+ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, -+ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f -+}; -+static const u8 enc_output010[] __initconst = { -+ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, -+ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, -+ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, -+ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, -+ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, -+ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, -+ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, -+ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, -+ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, -+ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, -+ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, -+ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, -+ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, -+ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, -+ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, -+ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, -+ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, -+ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, -+ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, -+ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, -+ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, -+ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, -+ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, -+ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, -+ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, -+ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, -+ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, -+ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, -+ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, -+ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, -+ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, -+ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, -+ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, -+ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, -+ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, -+ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, -+ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, -+ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, -+ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, -+ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, -+ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, -+ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, -+ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, -+ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, -+ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, -+ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, -+ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, -+ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, -+ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, -+ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, -+ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, -+ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, -+ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, -+ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, -+ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, -+ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, -+ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, -+ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, -+ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, -+ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, -+ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, -+ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, -+ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, -+ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, -+ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, -+ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, -+ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, -+ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, -+ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, -+ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, -+ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, -+ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, -+ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, -+ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, -+ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, -+ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, -+ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, -+ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, -+ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, -+ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, -+ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, -+ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, -+ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, -+ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, -+ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, -+ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, -+ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, -+ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, -+ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, -+ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, -+ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, -+ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, -+ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, -+ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, -+ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, -+ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, -+ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, -+ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, -+ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, -+ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, -+ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, -+ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, -+ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, -+ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, -+ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, -+ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, -+ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, -+ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, -+ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, -+ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, -+ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, -+ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, -+ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, -+ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, -+ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, -+ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, -+ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, -+ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, -+ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, -+ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, -+ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, -+ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, -+ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, -+ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, -+ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, -+ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, -+ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, -+ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, -+ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, -+ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 -+}; -+static const u8 enc_assoc010[] __initconst = { -+ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, -+ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 -+}; -+static const u8 enc_nonce010[] __initconst = { -+ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 -+}; -+static const u8 enc_key010[] __initconst = { -+ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, -+ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, -+ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, -+ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 -+}; -+ -+static const u8 enc_input011[] __initconst = { -+ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, -+ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, -+ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, -+ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, -+ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, -+ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, -+ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, -+ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, -+ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, -+ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, -+ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, -+ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, -+ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, -+ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, -+ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, -+ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, -+ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, -+ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, -+ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, -+ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, -+ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, -+ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, -+ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, -+ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, -+ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, -+ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, -+ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, -+ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, -+ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, -+ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, -+ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, -+ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, -+ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, -+ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, -+ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, -+ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, -+ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, -+ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, -+ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, -+ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, -+ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, -+ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, -+ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, -+ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, -+ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, -+ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, -+ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, -+ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, -+ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, -+ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, -+ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, -+ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, -+ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, -+ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, -+ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, -+ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, -+ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, -+ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, -+ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, -+ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, -+ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, -+ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, -+ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, -+ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, -+ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, -+ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, -+ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, -+ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, -+ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, -+ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, -+ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, -+ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, -+ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, -+ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, -+ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, -+ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, -+ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, -+ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, -+ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, -+ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, -+ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, -+ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, -+ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, -+ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, -+ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, -+ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, -+ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, -+ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, -+ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, -+ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, -+ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, -+ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, -+ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, -+ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, -+ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, -+ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, -+ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, -+ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, -+ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, -+ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, -+ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, -+ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, -+ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, -+ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, -+ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, -+ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, -+ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, -+ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, -+ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, -+ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, -+ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, -+ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, -+ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, -+ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, -+ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, -+ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, -+ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, -+ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, -+ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, -+ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, -+ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, -+ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, -+ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, -+ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, -+ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, -+ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, -+ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, -+ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, -+ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, -+ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, -+ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, -+ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, -+ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, -+ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, -+ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, -+ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, -+ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, -+ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, -+ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, -+ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, -+ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, -+ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, -+ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, -+ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, -+ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, -+ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, -+ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, -+ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, -+ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, -+ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, -+ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, -+ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, -+ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, -+ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, -+ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, -+ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, -+ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, -+ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, -+ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, -+ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, -+ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, -+ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, -+ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, -+ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, -+ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, -+ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, -+ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, -+ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, -+ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, -+ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, -+ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, -+ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, -+ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, -+ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, -+ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, -+ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, -+ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, -+ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, -+ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, -+ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, -+ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, -+ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, -+ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, -+ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, -+ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, -+ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, -+ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, -+ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, -+ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, -+ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, -+ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, -+ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, -+ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, -+ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, -+ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, -+ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, -+ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, -+ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, -+ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, -+ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, -+ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, -+ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, -+ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, -+ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, -+ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, -+ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, -+ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, -+ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, -+ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, -+ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, -+ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, -+ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, -+ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, -+ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, -+ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, -+ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, -+ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, -+ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, -+ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, -+ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, -+ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, -+ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, -+ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, -+ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, -+ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, -+ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, -+ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, -+ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, -+ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, -+ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, -+ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, -+ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, -+ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, -+ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, -+ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, -+ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, -+ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, -+ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, -+ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, -+ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, -+ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, -+ 0x10, 0x1e, 0xbf, 0xec, 0xa8 -+}; -+static const u8 enc_output011[] __initconst = { -+ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, -+ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, -+ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, -+ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, -+ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, -+ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, -+ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, -+ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, -+ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, -+ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, -+ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, -+ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, -+ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, -+ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, -+ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, -+ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, -+ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, -+ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, -+ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, -+ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, -+ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, -+ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, -+ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, -+ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, -+ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, -+ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, -+ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, -+ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, -+ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, -+ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, -+ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, -+ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, -+ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, -+ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, -+ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, -+ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, -+ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, -+ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, -+ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, -+ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, -+ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, -+ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, -+ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, -+ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, -+ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, -+ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, -+ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, -+ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, -+ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, -+ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, -+ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, -+ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, -+ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, -+ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, -+ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, -+ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, -+ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, -+ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, -+ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, -+ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, -+ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, -+ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, -+ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, -+ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, -+ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, -+ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, -+ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, -+ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, -+ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, -+ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, -+ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, -+ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, -+ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, -+ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, -+ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, -+ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, -+ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, -+ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, -+ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, -+ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, -+ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, -+ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, -+ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, -+ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, -+ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, -+ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, -+ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, -+ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, -+ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, -+ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, -+ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, -+ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, -+ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, -+ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, -+ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, -+ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, -+ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, -+ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, -+ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, -+ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, -+ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, -+ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, -+ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, -+ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, -+ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, -+ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, -+ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, -+ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, -+ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, -+ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, -+ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, -+ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, -+ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, -+ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, -+ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, -+ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, -+ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, -+ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, -+ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, -+ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, -+ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, -+ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, -+ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, -+ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, -+ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, -+ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, -+ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, -+ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, -+ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, -+ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, -+ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, -+ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, -+ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, -+ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, -+ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, -+ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, -+ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, -+ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, -+ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, -+ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, -+ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, -+ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, -+ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, -+ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, -+ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, -+ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, -+ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, -+ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, -+ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, -+ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, -+ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, -+ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, -+ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, -+ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, -+ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, -+ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, -+ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, -+ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, -+ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, -+ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, -+ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, -+ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, -+ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, -+ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, -+ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, -+ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, -+ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, -+ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, -+ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, -+ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, -+ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, -+ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, -+ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, -+ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, -+ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, -+ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, -+ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, -+ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, -+ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, -+ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, -+ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, -+ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, -+ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, -+ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, -+ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, -+ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, -+ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, -+ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, -+ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, -+ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, -+ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, -+ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, -+ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, -+ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, -+ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, -+ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, -+ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, -+ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, -+ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, -+ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, -+ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, -+ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, -+ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, -+ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, -+ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, -+ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, -+ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, -+ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, -+ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, -+ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, -+ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, -+ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, -+ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, -+ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, -+ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, -+ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, -+ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, -+ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, -+ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, -+ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, -+ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, -+ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, -+ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, -+ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, -+ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, -+ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, -+ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, -+ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, -+ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, -+ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, -+ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, -+ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, -+ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, -+ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, -+ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, -+ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, -+ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, -+ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, -+ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, -+ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, -+ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, -+ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, -+ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, -+ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c -+}; -+static const u8 enc_assoc011[] __initconst = { -+ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 -+}; -+static const u8 enc_nonce011[] __initconst = { -+ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa -+}; -+static const u8 enc_key011[] __initconst = { -+ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, -+ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, -+ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, -+ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 -+}; -+ -+static const u8 enc_input012[] __initconst = { -+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, -+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, -+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, -+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, -+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, -+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, -+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, -+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, -+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, -+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, -+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, -+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, -+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, -+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, -+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, -+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, -+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, -+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, -+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, -+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, -+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, -+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, -+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, -+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, -+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, -+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, -+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, -+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, -+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, -+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, -+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, -+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, -+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, -+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, -+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, -+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, -+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, -+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, -+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, -+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, -+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, -+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, -+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, -+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, -+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, -+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, -+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, -+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, -+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, -+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, -+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, -+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, -+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, -+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, -+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, -+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, -+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, -+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, -+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, -+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, -+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, -+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, -+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, -+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, -+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, -+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, -+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, -+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, -+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, -+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, -+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, -+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, -+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, -+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, -+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, -+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, -+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, -+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, -+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, -+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, -+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, -+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, -+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, -+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, -+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, -+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, -+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, -+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, -+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, -+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, -+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, -+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, -+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, -+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, -+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, -+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, -+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, -+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, -+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, -+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, -+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, -+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, -+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, -+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, -+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, -+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, -+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, -+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, -+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, -+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, -+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, -+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, -+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, -+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, -+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, -+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, -+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, -+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, -+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, -+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, -+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, -+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, -+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, -+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, -+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, -+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, -+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, -+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, -+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, -+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, -+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, -+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, -+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, -+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, -+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, -+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, -+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, -+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, -+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, -+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, -+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, -+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, -+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, -+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, -+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, -+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, -+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, -+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, -+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, -+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, -+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, -+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, -+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, -+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, -+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, -+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, -+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, -+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, -+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, -+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, -+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, -+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, -+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, -+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, -+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, -+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, -+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, -+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, -+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, -+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, -+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, -+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, -+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, -+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, -+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, -+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, -+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, -+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, -+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, -+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, -+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, -+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, -+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, -+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, -+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, -+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, -+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, -+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, -+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, -+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, -+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, -+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, -+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, -+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, -+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, -+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, -+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, -+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, -+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, -+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, -+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, -+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, -+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, -+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, -+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, -+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, -+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, -+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, -+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, -+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, -+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, -+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, -+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, -+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, -+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, -+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, -+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, -+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, -+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, -+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, -+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, -+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, -+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, -+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, -+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, -+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, -+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, -+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, -+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, -+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, -+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, -+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, -+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, -+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, -+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, -+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, -+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, -+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, -+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, -+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, -+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, -+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, -+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, -+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, -+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, -+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, -+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, -+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, -+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, -+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, -+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, -+ 0x78, 0xec, 0x00 -+}; -+static const u8 enc_output012[] __initconst = { -+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, -+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, -+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, -+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, -+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, -+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, -+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, -+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, -+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, -+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, -+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, -+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, -+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, -+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, -+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, -+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, -+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, -+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, -+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, -+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, -+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, -+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, -+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, -+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, -+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, -+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, -+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, -+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, -+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, -+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, -+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, -+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, -+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, -+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, -+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, -+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, -+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, -+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, -+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, -+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, -+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, -+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, -+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, -+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, -+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, -+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, -+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, -+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, -+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, -+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, -+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, -+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, -+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, -+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, -+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, -+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, -+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, -+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, -+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, -+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, -+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, -+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, -+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, -+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, -+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, -+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, -+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, -+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, -+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, -+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, -+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, -+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, -+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, -+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, -+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, -+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, -+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, -+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, -+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, -+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, -+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, -+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, -+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, -+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, -+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, -+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, -+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, -+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, -+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, -+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, -+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, -+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, -+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, -+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, -+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, -+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, -+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, -+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, -+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, -+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, -+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, -+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, -+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, -+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, -+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, -+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, -+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, -+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, -+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, -+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, -+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, -+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, -+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, -+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, -+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, -+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, -+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, -+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, -+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, -+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, -+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, -+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, -+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, -+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, -+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, -+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, -+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, -+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, -+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, -+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, -+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, -+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, -+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, -+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, -+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, -+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, -+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, -+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, -+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, -+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, -+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, -+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, -+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, -+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, -+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, -+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, -+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, -+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, -+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, -+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, -+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, -+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, -+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, -+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, -+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, -+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, -+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, -+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, -+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, -+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, -+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, -+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, -+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, -+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, -+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, -+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, -+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, -+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, -+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, -+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, -+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, -+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, -+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, -+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, -+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, -+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, -+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, -+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, -+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, -+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, -+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, -+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, -+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, -+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, -+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, -+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, -+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, -+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, -+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, -+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, -+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, -+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, -+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, -+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, -+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, -+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, -+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, -+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, -+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, -+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, -+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, -+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, -+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, -+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, -+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, -+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, -+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, -+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, -+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, -+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, -+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, -+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, -+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, -+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, -+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, -+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, -+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, -+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, -+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, -+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, -+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, -+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, -+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, -+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, -+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, -+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, -+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, -+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, -+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, -+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, -+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, -+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, -+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, -+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, -+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, -+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, -+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, -+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, -+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, -+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, -+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, -+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, -+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, -+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, -+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, -+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, -+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, -+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, -+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, -+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, -+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, -+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, -+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, -+ 0x70, 0xcf, 0xd6 -+}; -+static const u8 enc_assoc012[] __initconst = { -+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, -+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, -+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, -+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, -+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, -+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, -+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, -+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 -+}; -+static const u8 enc_nonce012[] __initconst = { -+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 -+}; -+static const u8 enc_key012[] __initconst = { -+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, -+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, -+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, -+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input053[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, -+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, -+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe -+}; -+static const u8 enc_output053[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7, -+ 0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07 -+}; -+static const u8 enc_assoc053[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce053[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key053[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input054[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, -+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, -+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, -+ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, -+ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, -+ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, -+ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd -+}; -+static const u8 enc_output054[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2, -+ 0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce -+}; -+static const u8 enc_assoc054[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce054[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key054[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input055[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, -+ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8, -+ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe, -+ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe, -+ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b, -+ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5, -+ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd, -+ 0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa, -+ 0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02, -+ 0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80, -+ 0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4, -+ 0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7, -+ 0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed, -+ 0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38, -+ 0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7 -+}; -+static const u8 enc_output055[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3, -+ 0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e -+}; -+static const u8 enc_assoc055[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce055[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key055[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input056[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, -+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, -+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41 -+}; -+static const u8 enc_output056[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27, -+ 0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6 -+}; -+static const u8 enc_assoc056[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce056[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key056[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input057[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, -+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, -+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, -+ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, -+ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, -+ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, -+ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42 -+}; -+static const u8 enc_output057[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b, -+ 0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68 -+}; -+static const u8 enc_assoc057[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce057[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key057[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input058[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17, -+ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84, -+ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41, -+ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01, -+ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4, -+ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a, -+ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42, -+ 0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05, -+ 0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd, -+ 0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f, -+ 0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b, -+ 0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38, -+ 0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12, -+ 0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7, -+ 0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38 -+}; -+static const u8 enc_output058[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91, -+ 0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62 -+}; -+static const u8 enc_assoc058[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce058[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key058[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input059[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, -+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, -+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, -+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e -+}; -+static const u8 enc_output059[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75, -+ 0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4 -+}; -+static const u8 enc_assoc059[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 -+}; -+static const u8 enc_nonce059[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key059[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input060[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, -+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, -+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, -+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, -+ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, -+ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, -+ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, -+ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d -+}; -+static const u8 enc_output060[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba, -+ 0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a -+}; -+static const u8 enc_assoc060[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 -+}; -+static const u8 enc_nonce060[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key060[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input061[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03, -+ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68, -+ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb, -+ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e, -+ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e, -+ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab, -+ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65, -+ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d, -+ 0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a, -+ 0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82, -+ 0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00, -+ 0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44, -+ 0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47, -+ 0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d, -+ 0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8, -+ 0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47 -+}; -+static const u8 enc_output061[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f, -+ 0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a -+}; -+static const u8 enc_assoc061[] __initconst = { -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 -+}; -+static const u8 enc_nonce061[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key061[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input062[] __initconst = { -+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, -+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, -+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, -+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1 -+}; -+static const u8 enc_output062[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59, -+ 0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19 -+}; -+static const u8 enc_assoc062[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f -+}; -+static const u8 enc_nonce062[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key062[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input063[] __initconst = { -+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, -+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, -+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, -+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, -+ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, -+ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, -+ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, -+ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2 -+}; -+static const u8 enc_output063[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2, -+ 0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c -+}; -+static const u8 enc_assoc063[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f -+}; -+static const u8 enc_nonce063[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key063[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input064[] __initconst = { -+ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc, -+ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97, -+ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04, -+ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1, -+ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81, -+ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54, -+ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a, -+ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2, -+ 0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85, -+ 0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d, -+ 0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff, -+ 0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb, -+ 0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8, -+ 0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92, -+ 0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47, -+ 0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8 -+}; -+static const u8 enc_output064[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5, -+ 0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06 -+}; -+static const u8 enc_assoc064[] __initconst = { -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f -+}; -+static const u8 enc_nonce064[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key064[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input065[] __initconst = { -+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, -+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, -+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, -+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41 -+}; -+static const u8 enc_output065[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf, -+ 0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67 -+}; -+static const u8 enc_assoc065[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce065[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key065[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input066[] __initconst = { -+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, -+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, -+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, -+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, -+ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, -+ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, -+ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, -+ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42 -+}; -+static const u8 enc_output066[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89, -+ 0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90 -+}; -+static const u8 enc_assoc066[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce066[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key066[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input067[] __initconst = { -+ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c, -+ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17, -+ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84, -+ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41, -+ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01, -+ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4, -+ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a, -+ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42, -+ 0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05, -+ 0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd, -+ 0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f, -+ 0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b, -+ 0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38, -+ 0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12, -+ 0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7, -+ 0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38 -+}; -+static const u8 enc_output067[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94, -+ 0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22 -+}; -+static const u8 enc_assoc067[] __initconst = { -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, -+ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce067[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key067[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input068[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, -+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, -+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41 -+}; -+static const u8 enc_output068[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9, -+ 0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d -+}; -+static const u8 enc_assoc068[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce068[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key068[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input069[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, -+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, -+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, -+ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, -+ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, -+ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, -+ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42 -+}; -+static const u8 enc_output069[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde, -+ 0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82 -+}; -+static const u8 enc_assoc069[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce069[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key069[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input070[] __initconst = { -+ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c, -+ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17, -+ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84, -+ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41, -+ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01, -+ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4, -+ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a, -+ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42, -+ 0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05, -+ 0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd, -+ 0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f, -+ 0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b, -+ 0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38, -+ 0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12, -+ 0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7, -+ 0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38 -+}; -+static const u8 enc_output070[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3, -+ 0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71 -+}; -+static const u8 enc_assoc070[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce070[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key070[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input071[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, -+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe -+}; -+static const u8 enc_output071[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5, -+ 0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20 -+}; -+static const u8 enc_assoc071[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce071[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key071[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input072[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, -+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, -+ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, -+ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, -+ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, -+ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd -+}; -+static const u8 enc_output072[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e, -+ 0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4 -+}; -+static const u8 enc_assoc072[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce072[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key072[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input073[] __initconst = { -+ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83, -+ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8, -+ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b, -+ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe, -+ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe, -+ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b, -+ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5, -+ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd, -+ 0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa, -+ 0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02, -+ 0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80, -+ 0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4, -+ 0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7, -+ 0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed, -+ 0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38, -+ 0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7 -+}; -+static const u8 enc_output073[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51, -+ 0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f -+}; -+static const u8 enc_assoc073[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_nonce073[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00 -+}; -+static const u8 enc_key073[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input076[] __initconst = { -+ 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85, -+ 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02, -+ 0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09, -+ 0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8, -+ 0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd, -+ 0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85, -+ 0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39, -+ 0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae, -+ 0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed, -+ 0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4, -+ 0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c, -+ 0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33, -+ 0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19, -+ 0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11, -+ 0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9, -+ 0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5 -+}; -+static const u8 enc_output076[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d, -+ 0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63 -+}; -+static const u8 enc_assoc076[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce076[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0 -+}; -+static const u8 enc_key076[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input077[] __initconst = { -+ 0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae, -+ 0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16, -+ 0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7, -+ 0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61, -+ 0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2, -+ 0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf, -+ 0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf, -+ 0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48, -+ 0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8, -+ 0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa, -+ 0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87, -+ 0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32, -+ 0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1, -+ 0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b, -+ 0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c, -+ 0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f -+}; -+static const u8 enc_output077[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4, -+ 0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa -+}; -+static const u8 enc_assoc077[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce077[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66 -+}; -+static const u8 enc_key077[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input078[] __initconst = { -+ 0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef, -+ 0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5, -+ 0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c, -+ 0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d, -+ 0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf, -+ 0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6, -+ 0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe, -+ 0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5, -+ 0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62, -+ 0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c, -+ 0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f, -+ 0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2, -+ 0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33, -+ 0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e, -+ 0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9, -+ 0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62 -+}; -+static const u8 enc_output078[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7, -+ 0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d -+}; -+static const u8 enc_assoc078[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce078[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90 -+}; -+static const u8 enc_key078[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input079[] __initconst = { -+ 0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9, -+ 0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6, -+ 0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c, -+ 0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82, -+ 0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21, -+ 0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12, -+ 0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c, -+ 0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d, -+ 0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32, -+ 0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84, -+ 0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3, -+ 0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24, -+ 0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94, -+ 0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53, -+ 0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71, -+ 0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd -+}; -+static const u8 enc_output079[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2, -+ 0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e -+}; -+static const u8 enc_assoc079[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce079[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a -+}; -+static const u8 enc_key079[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input080[] __initconst = { -+ 0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5, -+ 0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9, -+ 0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b, -+ 0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc, -+ 0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38, -+ 0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27, -+ 0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83, -+ 0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b, -+ 0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda, -+ 0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4, -+ 0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc, -+ 0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b, -+ 0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4, -+ 0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00, -+ 0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d, -+ 0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1 -+}; -+static const u8 enc_output080[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a, -+ 0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02 -+}; -+static const u8 enc_assoc080[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce080[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40 -+}; -+static const u8 enc_key080[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input081[] __initconst = { -+ 0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92, -+ 0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c, -+ 0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9, -+ 0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06, -+ 0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a, -+ 0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa, -+ 0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07, -+ 0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6, -+ 0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06, -+ 0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8, -+ 0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5, -+ 0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5, -+ 0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f, -+ 0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86, -+ 0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23, -+ 0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1 -+}; -+static const u8 enc_output081[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba, -+ 0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0 -+}; -+static const u8 enc_assoc081[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce081[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35 -+}; -+static const u8 enc_key081[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input082[] __initconst = { -+ 0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb, -+ 0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49, -+ 0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16, -+ 0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d, -+ 0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9, -+ 0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e, -+ 0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d, -+ 0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc, -+ 0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6, -+ 0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca, -+ 0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83, -+ 0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08, -+ 0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95, -+ 0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66, -+ 0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83, -+ 0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07 -+}; -+static const u8 enc_output082[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42, -+ 0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae -+}; -+static const u8 enc_assoc082[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce082[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5 -+}; -+static const u8 enc_key082[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input083[] __initconst = { -+ 0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58, -+ 0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84, -+ 0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5, -+ 0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf, -+ 0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39, -+ 0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03, -+ 0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f, -+ 0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa, -+ 0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08, -+ 0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59, -+ 0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56, -+ 0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9, -+ 0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43, -+ 0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa, -+ 0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba, -+ 0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a -+}; -+static const u8 enc_output083[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b, -+ 0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19 -+}; -+static const u8 enc_assoc083[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce083[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4 -+}; -+static const u8 enc_key083[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input084[] __initconst = { -+ 0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18, -+ 0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b, -+ 0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99, -+ 0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c, -+ 0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde, -+ 0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2, -+ 0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1, -+ 0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8, -+ 0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b, -+ 0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f, -+ 0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77, -+ 0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8, -+ 0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c, -+ 0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf, -+ 0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97, -+ 0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee -+}; -+static const u8 enc_output084[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab, -+ 0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87 -+}; -+static const u8 enc_assoc084[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce084[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8 -+}; -+static const u8 enc_key084[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input085[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6, -+ 0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed, -+ 0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7, -+ 0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37, -+ 0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a, -+ 0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b -+}; -+static const u8 enc_output085[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68, -+ 0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43 -+}; -+static const u8 enc_assoc085[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce085[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key085[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input093[] __initconst = { -+ 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d, -+ 0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44, -+ 0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3, -+ 0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b, -+ 0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb, -+ 0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c -+}; -+static const u8 enc_output093[] __initconst = { -+ 0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14, -+ 0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7, -+ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, -+ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, -+ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96, -+ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00, -+ 0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77, -+ 0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a -+}; -+static const u8 enc_assoc093[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce093[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key093[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input094[] __initconst = { -+ 0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90, -+ 0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45, -+ 0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef, -+ 0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09, -+ 0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20, -+ 0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10 -+}; -+static const u8 enc_output094[] __initconst = { -+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66, -+ 0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02, -+ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, -+ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, -+ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29, -+ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02, -+ 0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6, -+ 0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc -+}; -+static const u8 enc_assoc094[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce094[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key094[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input095[] __initconst = { -+ 0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b, -+ 0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46, -+ 0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6, -+ 0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a, -+ 0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79, -+ 0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13 -+}; -+static const u8 enc_output095[] __initconst = { -+ 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad, -+ 0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01, -+ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, -+ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, -+ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70, -+ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01, -+ 0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02, -+ 0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0 -+}; -+static const u8 enc_assoc095[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce095[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key095[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input096[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60, -+ 0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c, -+ 0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67, -+ 0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94, -+ 0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94, -+ 0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04 -+}; -+static const u8 enc_output096[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96, -+ 0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b, -+ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, -+ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, -+ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52, -+ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f, -+ 0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73, -+ 0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8 -+}; -+static const u8 enc_assoc096[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce096[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key096[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input097[] __initconst = { -+ 0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6, -+ 0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47, -+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01, -+ 0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96, -+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce, -+ 0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f -+}; -+static const u8 enc_output097[] __initconst = { -+ 0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00, -+ 0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, -+ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7, -+ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d, -+ 0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b, -+ 0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26 -+}; -+static const u8 enc_assoc097[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce097[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key097[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input098[] __initconst = { -+ 0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62, -+ 0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45, -+ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0, -+ 0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8, -+ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f, -+ 0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1 -+}; -+static const u8 enc_output098[] __initconst = { -+ 0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94, -+ 0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, -+ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66, -+ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3, -+ 0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e, -+ 0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b -+}; -+static const u8 enc_assoc098[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce098[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key098[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input099[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12, -+ 0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98, -+ 0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95, -+ 0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48, -+ 0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66, -+ 0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8 -+}; -+static const u8 enc_output099[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4, -+ 0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf, -+ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, -+ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, -+ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0, -+ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3, -+ 0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56, -+ 0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1 -+}; -+static const u8 enc_assoc099[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce099[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key099[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input100[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70, -+ 0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd, -+ 0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77, -+ 0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75, -+ 0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84, -+ 0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5 -+}; -+static const u8 enc_output100[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86, -+ 0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa, -+ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, -+ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, -+ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42, -+ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee, -+ 0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59, -+ 0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6 -+}; -+static const u8 enc_assoc100[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce100[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key100[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input101[] __initconst = { -+ 0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c, -+ 0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde, -+ 0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92, -+ 0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f, -+ 0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea, -+ 0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88 -+}; -+static const u8 enc_output101[] __initconst = { -+ 0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5, -+ 0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d, -+ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, -+ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, -+ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7, -+ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04, -+ 0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65, -+ 0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05 -+}; -+static const u8 enc_assoc101[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce101[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key101[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input102[] __initconst = { -+ 0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0, -+ 0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d, -+ 0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73, -+ 0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b, -+ 0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b, -+ 0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c -+}; -+static const u8 enc_output102[] __initconst = { -+ 0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39, -+ 0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e, -+ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, -+ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, -+ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46, -+ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00, -+ 0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56, -+ 0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04 -+}; -+static const u8 enc_assoc102[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce102[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key102[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input103[] __initconst = { -+ 0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d, -+ 0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce, -+ 0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c, -+ 0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a, -+ 0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14, -+ 0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d -+}; -+static const u8 enc_output103[] __initconst = { -+ 0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94, -+ 0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d, -+ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, -+ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, -+ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59, -+ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01, -+ 0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a, -+ 0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08 -+}; -+static const u8 enc_assoc103[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce103[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key103[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input104[] __initconst = { -+ 0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac, -+ 0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b, -+ 0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9, -+ 0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98, -+ 0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1, -+ 0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f -+}; -+static const u8 enc_output104[] __initconst = { -+ 0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35, -+ 0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88, -+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, -+ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, -+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec, -+ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03, -+ 0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d, -+ 0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d -+}; -+static const u8 enc_assoc104[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce104[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key104[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input105[] __initconst = { -+ 0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5, -+ 0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99, -+ 0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e, -+ 0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a, -+ 0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46, -+ 0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d -+}; -+static const u8 enc_output105[] __initconst = { -+ 0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c, -+ 0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a, -+ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, -+ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, -+ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b, -+ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01, -+ 0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88, -+ 0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd -+}; -+static const u8 enc_assoc105[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce105[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key105[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input106[] __initconst = { -+ 0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06, -+ 0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5, -+ 0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68, -+ 0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a, -+ 0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10, -+ 0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d -+}; -+static const u8 enc_output106[] __initconst = { -+ 0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f, -+ 0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76, -+ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, -+ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, -+ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d, -+ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01, -+ 0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88, -+ 0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a -+}; -+static const u8 enc_assoc106[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce106[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key106[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input107[] __initconst = { -+ 0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71, -+ 0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44, -+ 0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c, -+ 0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a, -+ 0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3, -+ 0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13 -+}; -+static const u8 enc_output107[] __initconst = { -+ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87, -+ 0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03, -+ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, -+ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, -+ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa, -+ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01, -+ 0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02, -+ 0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88 -+}; -+static const u8 enc_assoc107[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce107[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key107[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input108[] __initconst = { -+ 0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43, -+ 0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6, -+ 0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11, -+ 0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99, -+ 0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69, -+ 0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e -+}; -+static const u8 enc_output108[] __initconst = { -+ 0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda, -+ 0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15, -+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, -+ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, -+ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24, -+ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02, -+ 0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96, -+ 0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a -+}; -+static const u8 enc_assoc108[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce108[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key108[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input109[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a, -+ 0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb, -+ 0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96, -+ 0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f, -+ 0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59, -+ 0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16 -+}; -+static const u8 enc_output109[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac, -+ 0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c, -+ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, -+ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, -+ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50, -+ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04, -+ 0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1, -+ 0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb -+}; -+static const u8 enc_assoc109[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce109[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key109[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input110[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5, -+ 0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9, -+ 0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b, -+ 0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b, -+ 0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4, -+ 0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12 -+}; -+static const u8 enc_output110[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43, -+ 0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e, -+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, -+ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, -+ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd, -+ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00, -+ 0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6, -+ 0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe -+}; -+static const u8 enc_assoc110[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce110[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key110[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input111[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda, -+ 0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55, -+ 0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5, -+ 0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f, -+ 0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a, -+ 0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16 -+}; -+static const u8 enc_output111[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c, -+ 0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12, -+ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, -+ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, -+ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13, -+ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04, -+ 0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4, -+ 0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9 -+}; -+static const u8 enc_assoc111[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce111[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key111[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input112[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38, -+ 0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a, -+ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65, -+ 0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a, -+ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa, -+ 0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13 -+}; -+static const u8 enc_output112[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce, -+ 0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d, -+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, -+ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, -+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3, -+ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01, -+ 0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7, -+ 0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce -+}; -+static const u8 enc_assoc112[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce112[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key112[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input113[] __initconst = { -+ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86, -+ 0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f, -+ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1, -+ 0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f, -+ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e, -+ 0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16 -+}; -+static const u8 enc_output113[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70, -+ 0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48, -+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, -+ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, -+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37, -+ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04, -+ 0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa, -+ 0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3 -+}; -+static const u8 enc_assoc113[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce113[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key113[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input114[] __initconst = { -+ 0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73, -+ 0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc, -+ 0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a, -+ 0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f, -+ 0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2, -+ 0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88 -+}; -+static const u8 enc_output114[] __initconst = { -+ 0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea, -+ 0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f, -+ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, -+ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, -+ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf, -+ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04, -+ 0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69, -+ 0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1 -+}; -+static const u8 enc_assoc114[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce114[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key114[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input115[] __initconst = { -+ 0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb, -+ 0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68, -+ 0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09, -+ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8, -+ 0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33, -+ 0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98, -+ 0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39, -+ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4, -+ 0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b, -+ 0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f -+}; -+static const u8 enc_output115[] __initconst = { -+ 0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72, -+ 0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb, -+ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, -+ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, -+ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06, -+ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03, -+ 0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74, -+ 0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05 -+}; -+static const u8 enc_assoc115[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce115[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key115[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input116[] __initconst = { -+ 0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd, -+ 0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44, -+ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea, -+ 0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a, -+ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25, -+ 0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13 -+}; -+static const u8 enc_output116[] __initconst = { -+ 0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b, -+ 0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03, -+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, -+ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, -+ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c, -+ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01, -+ 0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50, -+ 0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2 -+}; -+static const u8 enc_assoc116[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce116[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key116[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input117[] __initconst = { -+ 0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d, -+ 0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47, -+ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06, -+ 0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08, -+ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9, -+ 0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11 -+}; -+static const u8 enc_output117[] __initconst = { -+ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb, -+ 0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00, -+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, -+ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, -+ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0, -+ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03, -+ 0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53, -+ 0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7 -+}; -+static const u8 enc_assoc117[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce117[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key117[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - edge case intermediate sums in poly1305 */ -+static const u8 enc_input118[] __initconst = { -+ 0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66, -+ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c, -+ 0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03, -+ 0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43, -+ 0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca, -+ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64, -+ 0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a, -+ 0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f, -+ 0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2, -+ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73, -+ 0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85, -+ 0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16 -+}; -+static const u8 enc_output118[] __initconst = { -+ 0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5, -+ 0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04, -+ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, -+ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, -+ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c, -+ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04, -+ 0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0, -+ 0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1 -+}; -+static const u8 enc_assoc118[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce118[] __initconst = { -+ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52 -+}; -+static const u8 enc_key118[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+static const struct chacha20poly1305_testvec -+chacha20poly1305_enc_vectors[] __initconst = { -+ { enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001, -+ sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) }, -+ { enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002, -+ sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) }, -+ { enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003, -+ sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) }, -+ { enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004, -+ sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) }, -+ { enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005, -+ sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) }, -+ { enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006, -+ sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) }, -+ { enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007, -+ sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) }, -+ { enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008, -+ sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) }, -+ { enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009, -+ sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) }, -+ { enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010, -+ sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) }, -+ { enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011, -+ sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) }, -+ { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012, -+ sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) }, -+ { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053, -+ sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) }, -+ { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054, -+ sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) }, -+ { enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055, -+ sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) }, -+ { enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056, -+ sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) }, -+ { enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057, -+ sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) }, -+ { enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058, -+ sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) }, -+ { enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059, -+ sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) }, -+ { enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060, -+ sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) }, -+ { enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061, -+ sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) }, -+ { enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062, -+ sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) }, -+ { enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063, -+ sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) }, -+ { enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064, -+ sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) }, -+ { enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065, -+ sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) }, -+ { enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066, -+ sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) }, -+ { enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067, -+ sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) }, -+ { enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068, -+ sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) }, -+ { enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069, -+ sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) }, -+ { enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070, -+ sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) }, -+ { enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071, -+ sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) }, -+ { enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072, -+ sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) }, -+ { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073, -+ sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) }, -+ { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076, -+ sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) }, -+ { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077, -+ sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) }, -+ { enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078, -+ sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) }, -+ { enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079, -+ sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) }, -+ { enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080, -+ sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) }, -+ { enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081, -+ sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) }, -+ { enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082, -+ sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) }, -+ { enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083, -+ sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) }, -+ { enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084, -+ sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) }, -+ { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085, -+ sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) }, -+ { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093, -+ sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) }, -+ { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094, -+ sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) }, -+ { enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095, -+ sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) }, -+ { enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096, -+ sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) }, -+ { enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097, -+ sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) }, -+ { enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098, -+ sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) }, -+ { enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099, -+ sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) }, -+ { enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100, -+ sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) }, -+ { enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101, -+ sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) }, -+ { enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102, -+ sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) }, -+ { enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103, -+ sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) }, -+ { enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104, -+ sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) }, -+ { enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105, -+ sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) }, -+ { enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106, -+ sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) }, -+ { enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107, -+ sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) }, -+ { enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108, -+ sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) }, -+ { enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109, -+ sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) }, -+ { enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110, -+ sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) }, -+ { enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111, -+ sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) }, -+ { enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112, -+ sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) }, -+ { enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113, -+ sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) }, -+ { enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114, -+ sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) }, -+ { enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115, -+ sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) }, -+ { enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116, -+ sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) }, -+ { enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117, -+ sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) }, -+ { enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118, -+ sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) } -+}; -+ -+static const u8 dec_input001[] __initconst = { -+ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, -+ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, -+ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, -+ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, -+ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, -+ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, -+ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, -+ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, -+ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, -+ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, -+ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, -+ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, -+ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, -+ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, -+ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, -+ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, -+ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, -+ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, -+ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, -+ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, -+ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, -+ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, -+ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, -+ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, -+ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, -+ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, -+ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, -+ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, -+ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, -+ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, -+ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, -+ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, -+ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, -+ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, -+ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, -+ 0x38 -+}; -+static const u8 dec_output001[] __initconst = { -+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, -+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, -+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, -+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, -+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, -+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, -+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, -+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, -+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, -+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, -+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, -+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, -+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, -+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, -+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, -+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, -+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, -+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, -+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, -+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, -+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, -+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, -+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, -+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, -+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, -+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, -+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, -+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, -+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, -+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, -+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, -+ 0x9d -+}; -+static const u8 dec_assoc001[] __initconst = { -+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x4e, 0x91 -+}; -+static const u8 dec_nonce001[] __initconst = { -+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 -+}; -+static const u8 dec_key001[] __initconst = { -+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, -+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, -+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, -+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 -+}; -+ -+static const u8 dec_input002[] __initconst = { -+ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1, -+ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92 -+}; -+static const u8 dec_output002[] __initconst = { }; -+static const u8 dec_assoc002[] __initconst = { }; -+static const u8 dec_nonce002[] __initconst = { -+ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e -+}; -+static const u8 dec_key002[] __initconst = { -+ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f, -+ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86, -+ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef, -+ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68 -+}; -+ -+static const u8 dec_input003[] __initconst = { -+ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6, -+ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77 -+}; -+static const u8 dec_output003[] __initconst = { }; -+static const u8 dec_assoc003[] __initconst = { -+ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b -+}; -+static const u8 dec_nonce003[] __initconst = { -+ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d -+}; -+static const u8 dec_key003[] __initconst = { -+ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88, -+ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a, -+ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08, -+ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d -+}; -+ -+static const u8 dec_input004[] __initconst = { -+ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2, -+ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac, -+ 0x89 -+}; -+static const u8 dec_output004[] __initconst = { -+ 0xa4 -+}; -+static const u8 dec_assoc004[] __initconst = { -+ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40 -+}; -+static const u8 dec_nonce004[] __initconst = { -+ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4 -+}; -+static const u8 dec_key004[] __initconst = { -+ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8, -+ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1, -+ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d, -+ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e -+}; -+ -+static const u8 dec_input005[] __initconst = { -+ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e, -+ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c, -+ 0xac -+}; -+static const u8 dec_output005[] __initconst = { -+ 0x2d -+}; -+static const u8 dec_assoc005[] __initconst = { }; -+static const u8 dec_nonce005[] __initconst = { -+ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30 -+}; -+static const u8 dec_key005[] __initconst = { -+ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31, -+ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87, -+ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01, -+ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87 -+}; -+ -+static const u8 dec_input006[] __initconst = { -+ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1, -+ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15, -+ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c, -+ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda, -+ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11, -+ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8, -+ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc, -+ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3, -+ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5, -+ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02, -+ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93, -+ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78, -+ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1, -+ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66, -+ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc, -+ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0, -+ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d, -+ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a, -+ 0xeb -+}; -+static const u8 dec_output006[] __initconst = { -+ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a, -+ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92, -+ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37, -+ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50, -+ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec, -+ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb, -+ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66, -+ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb, -+ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b, -+ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e, -+ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3, -+ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0, -+ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb, -+ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41, -+ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc, -+ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde, -+ 0x8f -+}; -+static const u8 dec_assoc006[] __initconst = { -+ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b -+}; -+static const u8 dec_nonce006[] __initconst = { -+ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c -+}; -+static const u8 dec_key006[] __initconst = { -+ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae, -+ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78, -+ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9, -+ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01 -+}; -+ -+static const u8 dec_input007[] __initconst = { -+ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c, -+ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8, -+ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c, -+ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb, -+ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0, -+ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21, -+ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70, -+ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac, -+ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99, -+ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9, -+ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f, -+ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7, -+ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53, -+ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12, -+ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6, -+ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0, -+ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54, -+ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6, -+ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e, -+ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb, -+ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30, -+ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f, -+ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2, -+ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e, -+ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34, -+ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39, -+ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7, -+ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9, -+ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82, -+ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04, -+ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34, -+ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef, -+ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42, -+ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53 -+}; -+static const u8 dec_output007[] __initconst = { -+ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5, -+ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a, -+ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1, -+ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17, -+ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c, -+ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1, -+ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51, -+ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1, -+ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86, -+ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a, -+ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a, -+ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98, -+ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36, -+ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34, -+ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57, -+ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84, -+ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4, -+ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80, -+ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82, -+ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5, -+ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d, -+ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c, -+ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf, -+ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc, -+ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3, -+ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14, -+ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81, -+ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77, -+ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3, -+ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2, -+ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b, -+ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3 -+}; -+static const u8 dec_assoc007[] __initconst = { }; -+static const u8 dec_nonce007[] __initconst = { -+ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0 -+}; -+static const u8 dec_key007[] __initconst = { -+ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd, -+ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c, -+ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80, -+ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01 -+}; -+ -+static const u8 dec_input008[] __initconst = { -+ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd, -+ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1, -+ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93, -+ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d, -+ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c, -+ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6, -+ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4, -+ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5, -+ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84, -+ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd, -+ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed, -+ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab, -+ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13, -+ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49, -+ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6, -+ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8, -+ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2, -+ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94, -+ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18, -+ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60, -+ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8, -+ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b, -+ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f, -+ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c, -+ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20, -+ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff, -+ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9, -+ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c, -+ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9, -+ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6, -+ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea, -+ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e, -+ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82, -+ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1, -+ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70, -+ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1, -+ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c, -+ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7, -+ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc, -+ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc, -+ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3, -+ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb, -+ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97, -+ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f, -+ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39, -+ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f, -+ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d, -+ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2, -+ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d, -+ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96, -+ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b, -+ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20, -+ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95, -+ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb, -+ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35, -+ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62, -+ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9, -+ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6, -+ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8, -+ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a, -+ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93, -+ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14, -+ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99, -+ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86, -+ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f, -+ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54 -+}; -+static const u8 dec_output008[] __initconst = { -+ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10, -+ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2, -+ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c, -+ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb, -+ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12, -+ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa, -+ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6, -+ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4, -+ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91, -+ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb, -+ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47, -+ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15, -+ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f, -+ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a, -+ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3, -+ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97, -+ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80, -+ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e, -+ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f, -+ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10, -+ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a, -+ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0, -+ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35, -+ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d, -+ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d, -+ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57, -+ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4, -+ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f, -+ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39, -+ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda, -+ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17, -+ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43, -+ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19, -+ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09, -+ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21, -+ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07, -+ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f, -+ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b, -+ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a, -+ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed, -+ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2, -+ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca, -+ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff, -+ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b, -+ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b, -+ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b, -+ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6, -+ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04, -+ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48, -+ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b, -+ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13, -+ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8, -+ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f, -+ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0, -+ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92, -+ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a, -+ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41, -+ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17, -+ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30, -+ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20, -+ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49, -+ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a, -+ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b, -+ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3 -+}; -+static const u8 dec_assoc008[] __initconst = { }; -+static const u8 dec_nonce008[] __initconst = { -+ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02 -+}; -+static const u8 dec_key008[] __initconst = { -+ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53, -+ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0, -+ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86, -+ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba -+}; -+ -+static const u8 dec_input009[] __initconst = { -+ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf, -+ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66, -+ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72, -+ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd, -+ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28, -+ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe, -+ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06, -+ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5, -+ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7, -+ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09, -+ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a, -+ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00, -+ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62, -+ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb, -+ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2, -+ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28, -+ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e, -+ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a, -+ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6, -+ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83, -+ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9, -+ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a, -+ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79, -+ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a, -+ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea, -+ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b, -+ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52, -+ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb, -+ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89, -+ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad, -+ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19, -+ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71, -+ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d, -+ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54, -+ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a, -+ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d, -+ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95, -+ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42, -+ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16, -+ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6, -+ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf, -+ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d, -+ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f, -+ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b, -+ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e, -+ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4, -+ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c, -+ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4, -+ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1, -+ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb, -+ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff, -+ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2, -+ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06, -+ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66, -+ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90, -+ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55, -+ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc, -+ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8, -+ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62, -+ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba, -+ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2, -+ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89, -+ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06, -+ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90, -+ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf, -+ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8, -+ 0xae -+}; -+static const u8 dec_output009[] __initconst = { -+ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b, -+ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8, -+ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca, -+ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09, -+ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5, -+ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85, -+ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44, -+ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97, -+ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77, -+ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41, -+ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c, -+ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00, -+ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82, -+ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f, -+ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e, -+ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55, -+ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab, -+ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17, -+ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e, -+ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f, -+ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82, -+ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3, -+ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f, -+ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0, -+ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08, -+ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b, -+ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85, -+ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28, -+ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c, -+ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62, -+ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2, -+ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3, -+ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62, -+ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40, -+ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f, -+ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b, -+ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91, -+ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5, -+ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c, -+ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4, -+ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49, -+ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04, -+ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03, -+ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa, -+ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec, -+ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6, -+ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69, -+ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36, -+ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8, -+ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf, -+ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe, -+ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82, -+ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab, -+ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d, -+ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3, -+ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5, -+ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34, -+ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49, -+ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f, -+ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d, -+ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42, -+ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef, -+ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27, -+ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52, -+ 0x65 -+}; -+static const u8 dec_assoc009[] __initconst = { -+ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e, -+ 0xef -+}; -+static const u8 dec_nonce009[] __initconst = { -+ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78 -+}; -+static const u8 dec_key009[] __initconst = { -+ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5, -+ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86, -+ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2, -+ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b -+}; -+ -+static const u8 dec_input010[] __initconst = { -+ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b, -+ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74, -+ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1, -+ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd, -+ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6, -+ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5, -+ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96, -+ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02, -+ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30, -+ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57, -+ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53, -+ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65, -+ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71, -+ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9, -+ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18, -+ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce, -+ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a, -+ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69, -+ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2, -+ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95, -+ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49, -+ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e, -+ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a, -+ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a, -+ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e, -+ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19, -+ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b, -+ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75, -+ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d, -+ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d, -+ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f, -+ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a, -+ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d, -+ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5, -+ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c, -+ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77, -+ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46, -+ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43, -+ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe, -+ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8, -+ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76, -+ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47, -+ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8, -+ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32, -+ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59, -+ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae, -+ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a, -+ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3, -+ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74, -+ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75, -+ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2, -+ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e, -+ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2, -+ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9, -+ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1, -+ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07, -+ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79, -+ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71, -+ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad, -+ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a, -+ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c, -+ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9, -+ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79, -+ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27, -+ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90, -+ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe, -+ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99, -+ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1, -+ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9, -+ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0, -+ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28, -+ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e, -+ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20, -+ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60, -+ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47, -+ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68, -+ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe, -+ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33, -+ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8, -+ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38, -+ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7, -+ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04, -+ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c, -+ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f, -+ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c, -+ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77, -+ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54, -+ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5, -+ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4, -+ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2, -+ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e, -+ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27, -+ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f, -+ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92, -+ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55, -+ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe, -+ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04, -+ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4, -+ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56, -+ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02, -+ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2, -+ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8, -+ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27, -+ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47, -+ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10, -+ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43, -+ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0, -+ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee, -+ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47, -+ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6, -+ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d, -+ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c, -+ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3, -+ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b, -+ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09, -+ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d, -+ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1, -+ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd, -+ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4, -+ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63, -+ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87, -+ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd, -+ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e, -+ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a, -+ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c, -+ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38, -+ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a, -+ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5, -+ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9, -+ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0 -+}; -+static const u8 dec_output010[] __initconst = { -+ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf, -+ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c, -+ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22, -+ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc, -+ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16, -+ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7, -+ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4, -+ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d, -+ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5, -+ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46, -+ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82, -+ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b, -+ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a, -+ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf, -+ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca, -+ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95, -+ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09, -+ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3, -+ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3, -+ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f, -+ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58, -+ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad, -+ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde, -+ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44, -+ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a, -+ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9, -+ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26, -+ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc, -+ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74, -+ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b, -+ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93, -+ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37, -+ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f, -+ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d, -+ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca, -+ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73, -+ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f, -+ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1, -+ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9, -+ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76, -+ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac, -+ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7, -+ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce, -+ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30, -+ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb, -+ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa, -+ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd, -+ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f, -+ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb, -+ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34, -+ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e, -+ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f, -+ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53, -+ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41, -+ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e, -+ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d, -+ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27, -+ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e, -+ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8, -+ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a, -+ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12, -+ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3, -+ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66, -+ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0, -+ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c, -+ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4, -+ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49, -+ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90, -+ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11, -+ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c, -+ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b, -+ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74, -+ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c, -+ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27, -+ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1, -+ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27, -+ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88, -+ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27, -+ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b, -+ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39, -+ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7, -+ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc, -+ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe, -+ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5, -+ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf, -+ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05, -+ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73, -+ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda, -+ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe, -+ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71, -+ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed, -+ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d, -+ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33, -+ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f, -+ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a, -+ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa, -+ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e, -+ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e, -+ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87, -+ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5, -+ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4, -+ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38, -+ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34, -+ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f, -+ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36, -+ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69, -+ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44, -+ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5, -+ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce, -+ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd, -+ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27, -+ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f, -+ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8, -+ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a, -+ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5, -+ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca, -+ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e, -+ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92, -+ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13, -+ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf, -+ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6, -+ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3, -+ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b, -+ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d, -+ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f, -+ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40, -+ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c, -+ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f -+}; -+static const u8 dec_assoc010[] __initconst = { -+ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27, -+ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2 -+}; -+static const u8 dec_nonce010[] __initconst = { -+ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30 -+}; -+static const u8 dec_key010[] __initconst = { -+ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44, -+ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf, -+ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74, -+ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7 -+}; -+ -+static const u8 dec_input011[] __initconst = { -+ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8, -+ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc, -+ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74, -+ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73, -+ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e, -+ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9, -+ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e, -+ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd, -+ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57, -+ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19, -+ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f, -+ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45, -+ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e, -+ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39, -+ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03, -+ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f, -+ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0, -+ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce, -+ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb, -+ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52, -+ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21, -+ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a, -+ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35, -+ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91, -+ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b, -+ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e, -+ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19, -+ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07, -+ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18, -+ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96, -+ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68, -+ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4, -+ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57, -+ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c, -+ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23, -+ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8, -+ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6, -+ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40, -+ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab, -+ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb, -+ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea, -+ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8, -+ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31, -+ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0, -+ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc, -+ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94, -+ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1, -+ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46, -+ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6, -+ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7, -+ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71, -+ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a, -+ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33, -+ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38, -+ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23, -+ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb, -+ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65, -+ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73, -+ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8, -+ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb, -+ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a, -+ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca, -+ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5, -+ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71, -+ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8, -+ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d, -+ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6, -+ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d, -+ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7, -+ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5, -+ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8, -+ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd, -+ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29, -+ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22, -+ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5, -+ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67, -+ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11, -+ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e, -+ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09, -+ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4, -+ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f, -+ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa, -+ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec, -+ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b, -+ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d, -+ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b, -+ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48, -+ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3, -+ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63, -+ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd, -+ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78, -+ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed, -+ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82, -+ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f, -+ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3, -+ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9, -+ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72, -+ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74, -+ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40, -+ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b, -+ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a, -+ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5, -+ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98, -+ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71, -+ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e, -+ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4, -+ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46, -+ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e, -+ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f, -+ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93, -+ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0, -+ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5, -+ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61, -+ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64, -+ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85, -+ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20, -+ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6, -+ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc, -+ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8, -+ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50, -+ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4, -+ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80, -+ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0, -+ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a, -+ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35, -+ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43, -+ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12, -+ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7, -+ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34, -+ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42, -+ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0, -+ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95, -+ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74, -+ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5, -+ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12, -+ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6, -+ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86, -+ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97, -+ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45, -+ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19, -+ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86, -+ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c, -+ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba, -+ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29, -+ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6, -+ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6, -+ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09, -+ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31, -+ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99, -+ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b, -+ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca, -+ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00, -+ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93, -+ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3, -+ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07, -+ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda, -+ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90, -+ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b, -+ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a, -+ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6, -+ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c, -+ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57, -+ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15, -+ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e, -+ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51, -+ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75, -+ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19, -+ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08, -+ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14, -+ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba, -+ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff, -+ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90, -+ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e, -+ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93, -+ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad, -+ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2, -+ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac, -+ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d, -+ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06, -+ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c, -+ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91, -+ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17, -+ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20, -+ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7, -+ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf, -+ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c, -+ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2, -+ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e, -+ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a, -+ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05, -+ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58, -+ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8, -+ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d, -+ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71, -+ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3, -+ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe, -+ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62, -+ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16, -+ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66, -+ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4, -+ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2, -+ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35, -+ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3, -+ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4, -+ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f, -+ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe, -+ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56, -+ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b, -+ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37, -+ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3, -+ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f, -+ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f, -+ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0, -+ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70, -+ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd, -+ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f, -+ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e, -+ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67, -+ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51, -+ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23, -+ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3, -+ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5, -+ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09, -+ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7, -+ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed, -+ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb, -+ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6, -+ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5, -+ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96, -+ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe, -+ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44, -+ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6, -+ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e, -+ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0, -+ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79, -+ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f, -+ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d, -+ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82, -+ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47, -+ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93, -+ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6, -+ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69, -+ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e, -+ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c -+}; -+static const u8 dec_output011[] __initconst = { -+ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b, -+ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b, -+ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d, -+ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee, -+ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30, -+ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20, -+ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f, -+ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e, -+ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66, -+ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46, -+ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35, -+ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6, -+ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0, -+ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15, -+ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13, -+ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7, -+ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3, -+ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37, -+ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc, -+ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95, -+ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8, -+ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac, -+ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45, -+ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf, -+ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d, -+ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc, -+ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45, -+ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a, -+ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec, -+ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e, -+ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10, -+ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8, -+ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66, -+ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0, -+ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62, -+ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b, -+ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4, -+ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96, -+ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7, -+ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74, -+ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8, -+ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b, -+ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70, -+ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95, -+ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3, -+ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9, -+ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d, -+ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e, -+ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32, -+ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5, -+ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80, -+ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3, -+ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad, -+ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d, -+ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20, -+ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17, -+ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6, -+ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d, -+ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82, -+ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c, -+ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9, -+ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb, -+ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96, -+ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9, -+ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f, -+ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40, -+ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc, -+ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce, -+ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71, -+ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f, -+ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35, -+ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90, -+ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8, -+ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01, -+ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1, -+ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe, -+ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4, -+ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf, -+ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9, -+ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f, -+ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04, -+ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7, -+ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15, -+ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc, -+ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0, -+ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae, -+ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb, -+ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed, -+ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51, -+ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52, -+ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84, -+ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5, -+ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4, -+ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e, -+ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74, -+ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f, -+ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13, -+ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea, -+ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b, -+ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef, -+ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09, -+ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe, -+ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1, -+ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9, -+ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15, -+ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a, -+ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab, -+ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36, -+ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd, -+ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde, -+ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd, -+ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47, -+ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5, -+ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69, -+ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21, -+ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98, -+ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07, -+ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57, -+ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd, -+ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03, -+ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11, -+ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96, -+ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91, -+ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d, -+ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0, -+ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9, -+ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42, -+ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a, -+ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18, -+ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc, -+ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce, -+ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc, -+ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0, -+ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf, -+ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7, -+ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80, -+ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c, -+ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82, -+ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9, -+ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20, -+ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58, -+ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6, -+ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc, -+ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50, -+ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86, -+ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a, -+ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80, -+ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec, -+ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08, -+ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c, -+ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde, -+ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d, -+ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17, -+ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f, -+ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26, -+ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96, -+ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97, -+ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6, -+ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55, -+ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e, -+ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88, -+ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5, -+ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b, -+ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15, -+ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1, -+ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4, -+ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3, -+ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf, -+ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e, -+ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb, -+ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76, -+ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5, -+ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c, -+ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde, -+ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f, -+ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51, -+ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9, -+ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99, -+ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6, -+ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04, -+ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31, -+ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a, -+ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56, -+ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e, -+ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78, -+ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a, -+ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7, -+ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb, -+ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6, -+ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8, -+ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc, -+ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84, -+ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86, -+ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76, -+ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a, -+ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73, -+ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8, -+ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6, -+ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2, -+ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56, -+ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb, -+ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab, -+ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76, -+ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69, -+ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d, -+ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc, -+ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22, -+ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39, -+ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6, -+ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9, -+ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f, -+ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1, -+ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83, -+ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc, -+ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4, -+ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59, -+ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68, -+ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef, -+ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1, -+ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3, -+ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44, -+ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09, -+ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8, -+ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a, -+ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d, -+ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae, -+ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2, -+ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10, -+ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a, -+ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34, -+ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f, -+ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9, -+ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b, -+ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d, -+ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57, -+ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03, -+ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87, -+ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca, -+ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53, -+ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f, -+ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61, -+ 0x10, 0x1e, 0xbf, 0xec, 0xa8 -+}; -+static const u8 dec_assoc011[] __initconst = { -+ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7 -+}; -+static const u8 dec_nonce011[] __initconst = { -+ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa -+}; -+static const u8 dec_key011[] __initconst = { -+ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85, -+ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca, -+ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52, -+ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38 -+}; -+ -+static const u8 dec_input012[] __initconst = { -+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, -+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, -+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, -+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, -+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, -+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, -+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, -+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, -+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, -+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, -+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, -+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, -+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, -+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, -+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, -+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, -+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, -+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, -+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, -+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, -+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, -+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, -+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, -+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, -+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, -+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, -+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, -+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, -+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, -+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, -+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, -+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, -+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, -+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, -+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, -+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, -+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, -+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, -+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, -+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, -+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, -+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, -+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, -+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, -+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, -+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, -+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, -+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, -+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, -+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, -+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, -+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, -+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, -+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, -+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, -+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, -+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, -+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, -+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, -+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, -+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, -+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, -+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, -+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, -+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, -+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, -+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, -+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, -+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, -+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, -+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, -+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, -+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, -+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, -+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, -+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, -+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, -+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, -+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, -+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, -+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, -+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, -+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, -+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, -+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, -+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, -+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, -+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, -+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, -+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, -+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, -+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, -+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, -+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, -+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, -+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, -+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, -+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, -+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, -+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, -+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, -+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, -+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, -+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, -+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, -+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, -+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, -+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, -+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, -+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, -+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, -+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, -+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, -+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, -+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, -+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, -+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, -+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, -+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, -+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, -+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, -+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, -+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, -+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, -+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, -+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, -+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, -+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, -+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, -+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, -+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, -+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, -+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, -+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, -+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, -+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, -+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, -+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, -+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, -+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, -+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, -+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, -+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, -+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, -+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, -+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, -+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, -+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, -+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, -+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, -+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, -+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, -+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, -+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, -+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, -+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, -+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, -+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, -+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, -+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, -+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, -+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, -+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, -+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, -+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, -+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, -+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, -+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, -+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, -+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, -+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, -+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, -+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, -+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, -+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, -+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, -+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, -+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, -+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, -+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, -+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, -+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, -+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, -+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, -+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, -+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, -+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, -+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, -+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, -+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, -+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, -+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, -+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, -+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, -+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, -+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, -+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, -+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, -+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, -+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, -+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, -+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, -+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, -+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, -+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, -+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, -+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, -+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, -+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, -+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, -+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, -+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, -+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, -+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, -+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, -+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, -+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, -+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, -+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, -+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, -+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, -+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, -+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, -+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, -+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, -+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, -+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, -+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, -+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, -+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, -+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, -+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, -+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, -+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, -+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, -+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, -+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, -+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, -+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, -+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, -+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, -+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, -+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, -+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, -+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, -+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, -+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, -+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, -+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, -+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, -+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, -+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, -+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, -+ 0x70, 0xcf, 0xd6 -+}; -+static const u8 dec_output012[] __initconst = { -+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, -+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, -+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, -+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, -+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, -+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, -+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, -+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, -+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, -+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, -+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, -+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, -+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, -+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, -+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, -+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, -+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, -+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, -+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, -+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, -+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, -+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, -+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, -+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, -+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, -+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, -+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, -+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, -+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, -+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, -+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, -+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, -+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, -+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, -+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, -+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, -+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, -+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, -+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, -+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, -+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, -+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, -+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, -+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, -+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, -+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, -+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, -+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, -+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, -+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, -+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, -+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, -+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, -+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, -+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, -+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, -+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, -+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, -+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, -+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, -+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, -+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, -+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, -+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, -+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, -+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, -+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, -+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, -+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, -+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, -+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, -+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, -+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, -+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, -+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, -+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, -+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, -+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, -+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, -+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, -+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, -+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, -+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, -+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, -+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, -+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, -+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, -+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, -+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, -+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, -+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, -+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, -+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, -+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, -+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, -+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, -+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, -+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, -+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, -+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, -+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, -+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, -+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, -+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, -+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, -+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, -+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, -+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, -+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, -+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, -+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, -+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, -+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, -+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, -+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, -+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, -+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, -+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, -+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, -+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, -+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, -+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, -+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, -+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, -+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, -+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, -+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, -+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, -+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, -+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, -+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, -+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, -+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, -+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, -+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, -+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, -+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, -+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, -+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, -+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, -+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, -+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, -+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, -+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, -+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, -+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, -+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, -+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, -+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, -+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, -+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, -+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, -+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, -+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, -+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, -+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, -+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, -+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, -+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, -+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, -+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, -+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, -+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, -+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, -+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, -+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, -+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, -+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, -+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, -+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, -+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, -+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, -+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, -+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, -+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, -+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, -+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, -+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, -+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, -+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, -+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, -+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, -+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, -+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, -+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, -+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, -+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, -+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, -+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, -+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, -+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, -+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, -+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, -+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, -+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, -+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, -+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, -+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, -+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, -+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, -+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, -+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, -+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, -+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, -+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, -+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, -+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, -+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, -+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, -+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, -+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, -+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, -+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, -+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, -+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, -+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, -+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, -+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, -+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, -+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, -+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, -+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, -+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, -+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, -+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, -+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, -+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, -+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, -+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, -+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, -+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, -+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, -+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, -+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, -+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, -+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, -+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, -+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, -+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, -+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, -+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, -+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, -+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, -+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, -+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, -+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, -+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, -+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, -+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, -+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, -+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, -+ 0x78, 0xec, 0x00 -+}; -+static const u8 dec_assoc012[] __initconst = { -+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, -+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, -+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, -+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, -+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, -+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, -+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, -+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 -+}; -+static const u8 dec_nonce012[] __initconst = { -+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 -+}; -+static const u8 dec_key012[] __initconst = { -+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, -+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, -+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, -+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 -+}; -+ -+static const u8 dec_input013[] __initconst = { -+ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3, -+ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf, -+ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1, -+ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f, -+ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e, -+ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5, -+ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b, -+ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b, -+ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2, -+ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1, -+ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74, -+ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e, -+ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae, -+ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd, -+ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04, -+ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55, -+ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef, -+ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b, -+ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74, -+ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26, -+ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f, -+ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64, -+ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd, -+ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad, -+ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b, -+ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e, -+ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e, -+ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0, -+ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f, -+ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50, -+ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97, -+ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03, -+ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a, -+ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15, -+ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb, -+ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34, -+ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47, -+ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86, -+ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24, -+ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c, -+ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9, -+ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7, -+ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48, -+ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b, -+ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e, -+ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61, -+ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75, -+ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26, -+ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74, -+ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43, -+ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1, -+ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79, -+ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3, -+ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5, -+ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9, -+ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d, -+ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8, -+ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26, -+ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5, -+ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d, -+ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29, -+ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57, -+ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92, -+ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9, -+ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc, -+ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd, -+ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57, -+ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3, -+ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4, -+ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c, -+ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27, -+ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c, -+ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5, -+ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14, -+ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94, -+ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b, -+ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99, -+ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84, -+ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a, -+ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa, -+ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75, -+ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74, -+ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40, -+ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72, -+ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f, -+ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92, -+ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8, -+ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c, -+ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f, -+ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb, -+ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a, -+ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b, -+ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d, -+ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c, -+ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4, -+ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00, -+ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b, -+ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4, -+ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84, -+ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba, -+ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47, -+ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4, -+ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88, -+ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81, -+ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1, -+ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a, -+ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e, -+ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1, -+ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07, -+ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24, -+ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f, -+ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a, -+ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9, -+ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9, -+ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51, -+ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1, -+ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c, -+ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53, -+ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40, -+ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a, -+ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2, -+ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2, -+ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8, -+ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07, -+ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9, -+ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d, -+ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde, -+ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f, -+ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d, -+ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d, -+ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56, -+ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c, -+ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3, -+ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d, -+ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26, -+ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10, -+ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c, -+ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11, -+ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf, -+ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c, -+ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb, -+ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79, -+ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa, -+ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80, -+ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08, -+ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c, -+ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc, -+ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab, -+ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6, -+ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9, -+ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7, -+ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2, -+ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33, -+ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2, -+ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e, -+ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c, -+ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b, -+ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66, -+ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6, -+ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44, -+ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74, -+ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6, -+ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f, -+ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24, -+ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1, -+ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2, -+ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5, -+ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d, -+ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0, -+ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b, -+ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3, -+ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0, -+ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3, -+ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c, -+ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b, -+ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5, -+ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51, -+ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71, -+ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68, -+ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb, -+ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e, -+ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b, -+ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8, -+ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb, -+ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54, -+ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7, -+ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff, -+ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd, -+ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde, -+ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c, -+ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1, -+ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8, -+ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14, -+ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c, -+ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4, -+ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06, -+ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52, -+ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d, -+ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c, -+ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6, -+ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5, -+ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f, -+ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e, -+ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98, -+ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8, -+ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb, -+ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b, -+ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79, -+ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11, -+ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d, -+ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10, -+ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23, -+ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23, -+ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90, -+ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4, -+ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1, -+ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7, -+ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11, -+ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50, -+ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8, -+ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97, -+ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38, -+ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f, -+ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33, -+ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f, -+ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75, -+ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21, -+ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90, -+ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8, -+ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91, -+ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1, -+ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f, -+ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3, -+ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc, -+ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a, -+ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62, -+ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55, -+ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23, -+ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6, -+ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac, -+ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12, -+ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a, -+ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7, -+ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec, -+ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28, -+ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88, -+ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4, -+ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17, -+ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2, -+ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33, -+ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a, -+ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28, -+ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62, -+ 0x70, 0xcf, 0xd7 -+}; -+static const u8 dec_output013[] __initconst = { -+ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0, -+ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5, -+ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57, -+ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff, -+ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5, -+ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b, -+ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46, -+ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b, -+ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71, -+ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0, -+ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b, -+ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d, -+ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f, -+ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24, -+ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23, -+ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e, -+ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14, -+ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d, -+ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb, -+ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4, -+ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf, -+ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e, -+ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6, -+ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33, -+ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb, -+ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0, -+ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe, -+ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00, -+ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d, -+ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b, -+ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50, -+ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e, -+ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4, -+ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28, -+ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8, -+ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b, -+ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86, -+ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67, -+ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff, -+ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59, -+ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe, -+ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6, -+ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e, -+ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b, -+ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50, -+ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39, -+ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02, -+ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9, -+ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a, -+ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38, -+ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9, -+ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65, -+ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb, -+ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2, -+ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae, -+ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee, -+ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00, -+ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c, -+ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8, -+ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31, -+ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68, -+ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4, -+ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0, -+ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11, -+ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7, -+ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39, -+ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1, -+ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1, -+ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2, -+ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66, -+ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49, -+ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2, -+ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5, -+ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3, -+ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c, -+ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa, -+ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00, -+ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54, -+ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87, -+ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03, -+ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39, -+ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40, -+ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6, -+ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22, -+ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5, -+ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e, -+ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32, -+ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53, -+ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42, -+ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c, -+ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68, -+ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48, -+ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c, -+ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce, -+ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd, -+ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa, -+ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69, -+ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8, -+ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58, -+ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0, -+ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45, -+ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb, -+ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33, -+ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c, -+ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23, -+ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80, -+ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1, -+ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff, -+ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24, -+ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9, -+ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46, -+ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8, -+ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20, -+ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35, -+ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63, -+ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb, -+ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36, -+ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a, -+ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c, -+ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f, -+ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02, -+ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03, -+ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa, -+ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16, -+ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d, -+ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5, -+ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7, -+ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac, -+ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47, -+ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3, -+ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35, -+ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e, -+ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6, -+ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74, -+ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e, -+ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a, -+ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0, -+ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4, -+ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8, -+ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16, -+ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32, -+ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65, -+ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06, -+ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a, -+ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7, -+ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85, -+ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb, -+ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46, -+ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e, -+ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61, -+ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb, -+ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d, -+ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00, -+ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5, -+ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6, -+ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1, -+ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a, -+ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7, -+ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63, -+ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38, -+ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3, -+ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed, -+ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49, -+ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42, -+ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0, -+ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f, -+ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1, -+ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd, -+ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d, -+ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88, -+ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1, -+ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25, -+ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22, -+ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28, -+ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f, -+ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53, -+ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28, -+ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8, -+ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc, -+ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8, -+ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb, -+ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3, -+ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3, -+ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac, -+ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2, -+ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a, -+ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad, -+ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e, -+ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd, -+ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf, -+ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba, -+ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41, -+ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91, -+ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d, -+ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6, -+ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf, -+ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92, -+ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e, -+ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72, -+ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04, -+ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46, -+ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55, -+ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84, -+ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61, -+ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d, -+ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8, -+ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d, -+ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87, -+ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70, -+ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94, -+ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f, -+ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb, -+ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90, -+ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31, -+ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06, -+ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05, -+ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7, -+ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e, -+ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae, -+ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2, -+ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21, -+ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0, -+ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d, -+ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0, -+ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6, -+ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5, -+ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9, -+ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8, -+ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57, -+ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1, -+ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c, -+ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b, -+ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69, -+ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d, -+ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d, -+ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19, -+ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82, -+ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20, -+ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f, -+ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e, -+ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f, -+ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47, -+ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b, -+ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4, -+ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b, -+ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4, -+ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9, -+ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3, -+ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0, -+ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16, -+ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d, -+ 0x78, 0xec, 0x00 -+}; -+static const u8 dec_assoc013[] __initconst = { -+ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8, -+ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce, -+ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c, -+ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc, -+ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e, -+ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f, -+ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b, -+ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9 -+}; -+static const u8 dec_nonce013[] __initconst = { -+ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06 -+}; -+static const u8 dec_key013[] __initconst = { -+ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e, -+ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d, -+ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e, -+ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 -+}; -+ -+static const struct chacha20poly1305_testvec -+chacha20poly1305_dec_vectors[] __initconst = { -+ { dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001, -+ sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) }, -+ { dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002, -+ sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) }, -+ { dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003, -+ sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) }, -+ { dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004, -+ sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) }, -+ { dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005, -+ sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) }, -+ { dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006, -+ sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) }, -+ { dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007, -+ sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) }, -+ { dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008, -+ sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) }, -+ { dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009, -+ sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) }, -+ { dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010, -+ sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) }, -+ { dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011, -+ sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) }, -+ { dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012, -+ sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) }, -+ { dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013, -+ sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013), -+ true } -+}; -+ -+static const u8 xenc_input001[] __initconst = { -+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, -+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, -+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, -+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, -+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, -+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, -+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, -+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, -+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, -+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, -+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, -+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, -+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, -+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, -+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, -+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, -+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, -+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, -+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, -+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, -+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, -+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, -+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, -+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, -+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, -+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, -+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, -+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, -+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, -+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, -+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, -+ 0x9d -+}; -+static const u8 xenc_output001[] __initconst = { -+ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, -+ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, -+ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, -+ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, -+ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, -+ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, -+ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, -+ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, -+ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, -+ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, -+ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, -+ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, -+ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, -+ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, -+ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, -+ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, -+ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, -+ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, -+ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, -+ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, -+ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, -+ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, -+ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, -+ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, -+ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, -+ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, -+ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, -+ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, -+ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, -+ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, -+ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, -+ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, -+ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, -+ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, -+ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, -+ 0x9c -+}; -+static const u8 xenc_assoc001[] __initconst = { -+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x4e, 0x91 -+}; -+static const u8 xenc_nonce001[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, -+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -+}; -+static const u8 xenc_key001[] __initconst = { -+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, -+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, -+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, -+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 -+}; -+ -+static const struct chacha20poly1305_testvec -+xchacha20poly1305_enc_vectors[] __initconst = { -+ { xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001, -+ sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) } -+}; -+ -+static const u8 xdec_input001[] __initconst = { -+ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77, -+ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92, -+ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18, -+ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d, -+ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e, -+ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86, -+ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2, -+ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85, -+ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09, -+ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49, -+ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd, -+ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8, -+ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f, -+ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79, -+ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8, -+ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0, -+ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88, -+ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71, -+ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91, -+ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf, -+ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89, -+ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46, -+ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e, -+ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90, -+ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b, -+ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58, -+ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54, -+ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1, -+ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73, -+ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69, -+ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05, -+ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83, -+ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13, -+ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8, -+ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5, -+ 0x9c -+}; -+static const u8 xdec_output001[] __initconst = { -+ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74, -+ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20, -+ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66, -+ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69, -+ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20, -+ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20, -+ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d, -+ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e, -+ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65, -+ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, -+ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63, -+ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f, -+ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64, -+ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65, -+ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, -+ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61, -+ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e, -+ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69, -+ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72, -+ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20, -+ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65, -+ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61, -+ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72, -+ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, -+ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61, -+ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, -+ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65, -+ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20, -+ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20, -+ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b, -+ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67, -+ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80, -+ 0x9d -+}; -+static const u8 xdec_assoc001[] __initconst = { -+ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x4e, 0x91 -+}; -+static const u8 xdec_nonce001[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, -+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -+}; -+static const u8 xdec_key001[] __initconst = { -+ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, -+ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, -+ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, -+ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 -+}; -+ -+static const struct chacha20poly1305_testvec -+xchacha20poly1305_dec_vectors[] __initconst = { -+ { xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001, -+ sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) } -+}; -+ -+static void __init -+chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u8 *nonce, const size_t nonce_len, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ if (nonce_len == 8) -+ chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, -+ get_unaligned_le64(nonce), key); -+ else -+ BUG(); -+} -+ -+static bool __init -+decryption_success(bool func_ret, bool expect_failure, int memcmp_result) -+{ -+ if (expect_failure) -+ return !func_ret; -+ return func_ret && !memcmp_result; -+} -+ -+bool __init chacha20poly1305_selftest(void) -+{ -+ enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; -+ size_t i; -+ u8 *computed_output = NULL, *heap_src = NULL; -+ bool success = true, ret; -+ -+ heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); -+ computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); -+ if (!heap_src || !computed_output) { -+ pr_err("chacha20poly1305 self-test malloc: FAIL\n"); -+ success = false; -+ goto out; -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { -+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); -+ chacha20poly1305_selftest_encrypt(computed_output, -+ chacha20poly1305_enc_vectors[i].input, -+ chacha20poly1305_enc_vectors[i].ilen, -+ chacha20poly1305_enc_vectors[i].assoc, -+ chacha20poly1305_enc_vectors[i].alen, -+ chacha20poly1305_enc_vectors[i].nonce, -+ chacha20poly1305_enc_vectors[i].nlen, -+ chacha20poly1305_enc_vectors[i].key); -+ if (memcmp(computed_output, -+ chacha20poly1305_enc_vectors[i].output, -+ chacha20poly1305_enc_vectors[i].ilen + -+ POLY1305_DIGEST_SIZE)) { -+ pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { -+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); -+ ret = chacha20poly1305_decrypt(computed_output, -+ chacha20poly1305_dec_vectors[i].input, -+ chacha20poly1305_dec_vectors[i].ilen, -+ chacha20poly1305_dec_vectors[i].assoc, -+ chacha20poly1305_dec_vectors[i].alen, -+ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), -+ chacha20poly1305_dec_vectors[i].key); -+ if (!decryption_success(ret, -+ chacha20poly1305_dec_vectors[i].failure, -+ memcmp(computed_output, -+ chacha20poly1305_dec_vectors[i].output, -+ chacha20poly1305_dec_vectors[i].ilen - -+ POLY1305_DIGEST_SIZE))) { -+ pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ -+ -+ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { -+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); -+ xchacha20poly1305_encrypt(computed_output, -+ xchacha20poly1305_enc_vectors[i].input, -+ xchacha20poly1305_enc_vectors[i].ilen, -+ xchacha20poly1305_enc_vectors[i].assoc, -+ xchacha20poly1305_enc_vectors[i].alen, -+ xchacha20poly1305_enc_vectors[i].nonce, -+ xchacha20poly1305_enc_vectors[i].key); -+ if (memcmp(computed_output, -+ xchacha20poly1305_enc_vectors[i].output, -+ xchacha20poly1305_enc_vectors[i].ilen + -+ POLY1305_DIGEST_SIZE)) { -+ pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) { -+ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); -+ ret = xchacha20poly1305_decrypt(computed_output, -+ xchacha20poly1305_dec_vectors[i].input, -+ xchacha20poly1305_dec_vectors[i].ilen, -+ xchacha20poly1305_dec_vectors[i].assoc, -+ xchacha20poly1305_dec_vectors[i].alen, -+ xchacha20poly1305_dec_vectors[i].nonce, -+ xchacha20poly1305_dec_vectors[i].key); -+ if (!decryption_success(ret, -+ xchacha20poly1305_dec_vectors[i].failure, -+ memcmp(computed_output, -+ xchacha20poly1305_dec_vectors[i].output, -+ xchacha20poly1305_dec_vectors[i].ilen - -+ POLY1305_DIGEST_SIZE))) { -+ pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ -+out: -+ kfree(heap_src); -+ kfree(computed_output); -+ return success; -+} ---- /dev/null -+++ b/lib/crypto/chacha20poly1305.c -@@ -0,0 +1,219 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is an implementation of the ChaCha20Poly1305 AEAD construction. -+ * -+ * Information: https://tools.ietf.org/html/rfc8439 -+ */ -+ -+#include <crypto/algapi.h> -+#include <crypto/chacha20poly1305.h> -+#include <crypto/chacha.h> -+#include <crypto/poly1305.h> -+ -+#include <asm/unaligned.h> -+#include <linux/kernel.h> -+#include <linux/init.h> -+#include <linux/mm.h> -+#include <linux/module.h> -+ -+#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) -+ -+bool __init chacha20poly1305_selftest(void); -+ -+static void chacha_load_key(u32 *k, const u8 *in) -+{ -+ k[0] = get_unaligned_le32(in); -+ k[1] = get_unaligned_le32(in + 4); -+ k[2] = get_unaligned_le32(in + 8); -+ k[3] = get_unaligned_le32(in + 12); -+ k[4] = get_unaligned_le32(in + 16); -+ k[5] = get_unaligned_le32(in + 20); -+ k[6] = get_unaligned_le32(in + 24); -+ k[7] = get_unaligned_le32(in + 28); -+} -+ -+static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce) -+{ -+ u32 k[CHACHA_KEY_WORDS]; -+ u8 iv[CHACHA_IV_SIZE]; -+ -+ memset(iv, 0, 8); -+ memcpy(iv + 8, nonce + 16, 8); -+ -+ chacha_load_key(k, key); -+ -+ /* Compute the subkey given the original key and first 128 nonce bits */ -+ chacha_init(chacha_state, k, nonce); -+ hchacha_block(chacha_state, k, 20); -+ -+ chacha_init(chacha_state, k, iv); -+ -+ memzero_explicit(k, sizeof(k)); -+ memzero_explicit(iv, sizeof(iv)); -+} -+ -+static void -+__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, u32 *chacha_state) -+{ -+ const u8 *pad0 = page_address(ZERO_PAGE(0)); -+ struct poly1305_desc_ctx poly1305_state; -+ union { -+ u8 block0[POLY1305_KEY_SIZE]; -+ __le64 lens[2]; -+ } b; -+ -+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ poly1305_init(&poly1305_state, b.block0); -+ -+ poly1305_update(&poly1305_state, ad, ad_len); -+ if (ad_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); -+ -+ chacha_crypt(chacha_state, dst, src, src_len, 20); -+ -+ poly1305_update(&poly1305_state, dst, src_len); -+ if (src_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf)); -+ -+ b.lens[0] = cpu_to_le64(ad_len); -+ b.lens[1] = cpu_to_le64(src_len); -+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); -+ -+ poly1305_final(&poly1305_state, dst + src_len); -+ -+ memzero_explicit(chacha_state, CHACHA_STATE_WORDS * sizeof(u32)); -+ memzero_explicit(&b, sizeof(b)); -+} -+ -+void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ u32 chacha_state[CHACHA_STATE_WORDS]; -+ u32 k[CHACHA_KEY_WORDS]; -+ __le64 iv[2]; -+ -+ chacha_load_key(k, key); -+ -+ iv[0] = 0; -+ iv[1] = cpu_to_le64(nonce); -+ -+ chacha_init(chacha_state, k, (u8 *)iv); -+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state); -+ -+ memzero_explicit(iv, sizeof(iv)); -+ memzero_explicit(k, sizeof(k)); -+} -+EXPORT_SYMBOL(chacha20poly1305_encrypt); -+ -+void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ u32 chacha_state[CHACHA_STATE_WORDS]; -+ -+ xchacha_init(chacha_state, key, nonce); -+ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state); -+} -+EXPORT_SYMBOL(xchacha20poly1305_encrypt); -+ -+static bool -+__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, u32 *chacha_state) -+{ -+ const u8 *pad0 = page_address(ZERO_PAGE(0)); -+ struct poly1305_desc_ctx poly1305_state; -+ size_t dst_len; -+ int ret; -+ union { -+ u8 block0[POLY1305_KEY_SIZE]; -+ u8 mac[POLY1305_DIGEST_SIZE]; -+ __le64 lens[2]; -+ } b; -+ -+ if (unlikely(src_len < POLY1305_DIGEST_SIZE)) -+ return false; -+ -+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ poly1305_init(&poly1305_state, b.block0); -+ -+ poly1305_update(&poly1305_state, ad, ad_len); -+ if (ad_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); -+ -+ dst_len = src_len - POLY1305_DIGEST_SIZE; -+ poly1305_update(&poly1305_state, src, dst_len); -+ if (dst_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (dst_len & 0xf)); -+ -+ b.lens[0] = cpu_to_le64(ad_len); -+ b.lens[1] = cpu_to_le64(dst_len); -+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); -+ -+ poly1305_final(&poly1305_state, b.mac); -+ -+ ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE); -+ if (likely(!ret)) -+ chacha_crypt(chacha_state, dst, src, dst_len, 20); -+ -+ memzero_explicit(&b, sizeof(b)); -+ -+ return !ret; -+} -+ -+bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ u32 chacha_state[CHACHA_STATE_WORDS]; -+ u32 k[CHACHA_KEY_WORDS]; -+ __le64 iv[2]; -+ bool ret; -+ -+ chacha_load_key(k, key); -+ -+ iv[0] = 0; -+ iv[1] = cpu_to_le64(nonce); -+ -+ chacha_init(chacha_state, k, (u8 *)iv); -+ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, -+ chacha_state); -+ -+ memzero_explicit(chacha_state, sizeof(chacha_state)); -+ memzero_explicit(iv, sizeof(iv)); -+ memzero_explicit(k, sizeof(k)); -+ return ret; -+} -+EXPORT_SYMBOL(chacha20poly1305_decrypt); -+ -+bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ u32 chacha_state[CHACHA_STATE_WORDS]; -+ -+ xchacha_init(chacha_state, key, nonce); -+ return __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, -+ chacha_state); -+} -+EXPORT_SYMBOL(xchacha20poly1305_decrypt); -+ -+static int __init mod_init(void) -+{ -+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && -+ WARN_ON(!chacha20poly1305_selftest())) -+ return -ENODEV; -+ return 0; -+} -+ -+module_init(mod_init); -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0033-crypto-lib-chacha20poly1305-reimplement-crypt_from_s.patch b/target/linux/generic/backport-5.4/080-wireguard-0033-crypto-lib-chacha20poly1305-reimplement-crypt_from_s.patch deleted file mode 100644 index e4b2b58b82..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0033-crypto-lib-chacha20poly1305-reimplement-crypt_from_s.patch +++ /dev/null @@ -1,295 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:40 +0100 -Subject: [PATCH] crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() - routine - -commit d95312a3ccc0cd544d374be2fc45aeaa803e5fd9 upstream. - -Reimplement the library routines to perform chacha20poly1305 en/decryption -on scatterlists, without [ab]using the [deprecated] blkcipher interface, -which is rather heavyweight and does things we don't really need. - -Instead, we use the sg_miter API in a novel and clever way, to iterate -over the scatterlist in-place (i.e., source == destination, which is the -only way this library is expected to be used). That way, we don't have to -iterate over two scatterlists in parallel. - -Another optimization is that, instead of relying on the blkcipher walker -to present the input in suitable chunks, we recognize that ChaCha is a -streamcipher, and so we can simply deal with partial blocks by keeping a -block of cipherstream on the stack and use crypto_xor() to mix it with -the in/output. - -Finally, we omit the scatterwalk_and_copy() call if the last element of -the scatterlist covers the MAC as well (which is the common case), -avoiding the need to walk the scatterlist and kmap() the page twice. - -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/chacha20poly1305.h | 11 ++ - lib/crypto/chacha20poly1305-selftest.c | 45 ++++++++ - lib/crypto/chacha20poly1305.c | 150 +++++++++++++++++++++++++ - 3 files changed, 206 insertions(+) - ---- a/include/crypto/chacha20poly1305.h -+++ b/include/crypto/chacha20poly1305.h -@@ -7,6 +7,7 @@ - #define __CHACHA20POLY1305_H - - #include <linux/types.h> -+#include <linux/scatterlist.h> - - enum chacha20poly1305_lengths { - XCHACHA20POLY1305_NONCE_SIZE = 24, -@@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decr - const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE], - const u8 key[CHACHA20POLY1305_KEY_SIZE]); - -+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ -+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]); -+ - #endif /* __CHACHA20POLY1305_H */ ---- a/lib/crypto/chacha20poly1305-selftest.c -+++ b/lib/crypto/chacha20poly1305-selftest.c -@@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(vo - enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; - size_t i; - u8 *computed_output = NULL, *heap_src = NULL; -+ struct scatterlist sg_src; - bool success = true, ret; - - heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); -@@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(vo - } - } - -+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { -+ if (chacha20poly1305_enc_vectors[i].nlen != 8) -+ continue; -+ memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, -+ chacha20poly1305_enc_vectors[i].ilen); -+ sg_init_one(&sg_src, heap_src, -+ chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE); -+ chacha20poly1305_encrypt_sg_inplace(&sg_src, -+ chacha20poly1305_enc_vectors[i].ilen, -+ chacha20poly1305_enc_vectors[i].assoc, -+ chacha20poly1305_enc_vectors[i].alen, -+ get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce), -+ chacha20poly1305_enc_vectors[i].key); -+ if (memcmp(heap_src, -+ chacha20poly1305_enc_vectors[i].output, -+ chacha20poly1305_enc_vectors[i].ilen + -+ POLY1305_DIGEST_SIZE)) { -+ pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } -+ - for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - ret = chacha20poly1305_decrypt(computed_output, -@@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(vo - } - } - -+ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { -+ memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, -+ chacha20poly1305_dec_vectors[i].ilen); -+ sg_init_one(&sg_src, heap_src, -+ chacha20poly1305_dec_vectors[i].ilen); -+ ret = chacha20poly1305_decrypt_sg_inplace(&sg_src, -+ chacha20poly1305_dec_vectors[i].ilen, -+ chacha20poly1305_dec_vectors[i].assoc, -+ chacha20poly1305_dec_vectors[i].alen, -+ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce), -+ chacha20poly1305_dec_vectors[i].key); -+ if (!decryption_success(ret, -+ chacha20poly1305_dec_vectors[i].failure, -+ memcmp(heap_src, chacha20poly1305_dec_vectors[i].output, -+ chacha20poly1305_dec_vectors[i].ilen - -+ POLY1305_DIGEST_SIZE))) { -+ pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n", -+ i + 1); -+ success = false; -+ } -+ } - - for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) { - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); ---- a/lib/crypto/chacha20poly1305.c -+++ b/lib/crypto/chacha20poly1305.c -@@ -11,6 +11,7 @@ - #include <crypto/chacha20poly1305.h> - #include <crypto/chacha.h> - #include <crypto/poly1305.h> -+#include <crypto/scatterwalk.h> - - #include <asm/unaligned.h> - #include <linux/kernel.h> -@@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst, - } - EXPORT_SYMBOL(xchacha20poly1305_decrypt); - -+static -+bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, -+ const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE], -+ int encrypt) -+{ -+ const u8 *pad0 = page_address(ZERO_PAGE(0)); -+ struct poly1305_desc_ctx poly1305_state; -+ u32 chacha_state[CHACHA_STATE_WORDS]; -+ struct sg_mapping_iter miter; -+ size_t partial = 0; -+ unsigned int flags; -+ bool ret = true; -+ int sl; -+ union { -+ struct { -+ u32 k[CHACHA_KEY_WORDS]; -+ __le64 iv[2]; -+ }; -+ u8 block0[POLY1305_KEY_SIZE]; -+ u8 chacha_stream[CHACHA_BLOCK_SIZE]; -+ struct { -+ u8 mac[2][POLY1305_DIGEST_SIZE]; -+ }; -+ __le64 lens[2]; -+ } b __aligned(16); -+ -+ chacha_load_key(b.k, key); -+ -+ b.iv[0] = 0; -+ b.iv[1] = cpu_to_le64(nonce); -+ -+ chacha_init(chacha_state, b.k, (u8 *)b.iv); -+ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ poly1305_init(&poly1305_state, b.block0); -+ -+ if (unlikely(ad_len)) { -+ poly1305_update(&poly1305_state, ad, ad_len); -+ if (ad_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); -+ } -+ -+ flags = SG_MITER_TO_SG; -+ if (!preemptible()) -+ flags |= SG_MITER_ATOMIC; -+ -+ sg_miter_start(&miter, src, sg_nents(src), flags); -+ -+ for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) { -+ u8 *addr = miter.addr; -+ size_t length = min_t(size_t, sl, miter.length); -+ -+ if (!encrypt) -+ poly1305_update(&poly1305_state, addr, length); -+ -+ if (unlikely(partial)) { -+ size_t l = min(length, CHACHA_BLOCK_SIZE - partial); -+ -+ crypto_xor(addr, b.chacha_stream + partial, l); -+ partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1); -+ -+ addr += l; -+ length -= l; -+ } -+ -+ if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) { -+ size_t l = length; -+ -+ if (unlikely(length < sl)) -+ l &= ~(CHACHA_BLOCK_SIZE - 1); -+ chacha_crypt(chacha_state, addr, addr, l, 20); -+ addr += l; -+ length -= l; -+ } -+ -+ if (unlikely(length > 0)) { -+ chacha_crypt(chacha_state, b.chacha_stream, pad0, -+ CHACHA_BLOCK_SIZE, 20); -+ crypto_xor(addr, b.chacha_stream, length); -+ partial = length; -+ } -+ -+ if (encrypt) -+ poly1305_update(&poly1305_state, miter.addr, -+ min_t(size_t, sl, miter.length)); -+ } -+ -+ if (src_len & 0xf) -+ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf)); -+ -+ b.lens[0] = cpu_to_le64(ad_len); -+ b.lens[1] = cpu_to_le64(src_len); -+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); -+ -+ if (likely(sl <= -POLY1305_DIGEST_SIZE)) { -+ if (encrypt) { -+ poly1305_final(&poly1305_state, -+ miter.addr + miter.length + sl); -+ ret = true; -+ } else { -+ poly1305_final(&poly1305_state, b.mac[0]); -+ ret = !crypto_memneq(b.mac[0], -+ miter.addr + miter.length + sl, -+ POLY1305_DIGEST_SIZE); -+ } -+ } -+ -+ sg_miter_stop(&miter); -+ -+ if (unlikely(sl > -POLY1305_DIGEST_SIZE)) { -+ poly1305_final(&poly1305_state, b.mac[1]); -+ scatterwalk_map_and_copy(b.mac[encrypt], src, src_len, -+ sizeof(b.mac[1]), encrypt); -+ ret = encrypt || -+ !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE); -+ } -+ -+ memzero_explicit(chacha_state, sizeof(chacha_state)); -+ memzero_explicit(&b, sizeof(b)); -+ -+ return ret; -+} -+ -+bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len, -+ nonce, key, 1); -+} -+EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace); -+ -+bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u64 nonce, -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ if (unlikely(src_len < POLY1305_DIGEST_SIZE)) -+ return false; -+ -+ return chacha20poly1305_crypt_sg_inplace(src, -+ src_len - POLY1305_DIGEST_SIZE, -+ ad, ad_len, nonce, key, 0); -+} -+EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace); -+ - static int __init mod_init(void) - { - if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && diff --git a/target/linux/generic/backport-5.4/080-wireguard-0034-crypto-chacha_generic-remove-unnecessary-setkey-func.patch b/target/linux/generic/backport-5.4/080-wireguard-0034-crypto-chacha_generic-remove-unnecessary-setkey-func.patch deleted file mode 100644 index 709b1fbcf5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0034-crypto-chacha_generic-remove-unnecessary-setkey-func.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Eric Biggers <ebiggers@google.com> -Date: Sun, 17 Nov 2019 23:21:29 -0800 -Subject: [PATCH] crypto: chacha_generic - remove unnecessary setkey() - functions - -commit 2043323a799a660bc84bbee404cf7a2617ec6157 upstream. - -Use chacha20_setkey() and chacha12_setkey() from -<crypto/internal/chacha.h> instead of defining them again in -chacha_generic.c. - -Signed-off-by: Eric Biggers <ebiggers@google.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/chacha_generic.c | 18 +++--------------- - 1 file changed, 3 insertions(+), 15 deletions(-) - ---- a/crypto/chacha_generic.c -+++ b/crypto/chacha_generic.c -@@ -37,18 +37,6 @@ static int chacha_stream_xor(struct skci - return err; - } - --static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize) --{ -- return chacha_setkey(tfm, key, keysize, 20); --} -- --static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -- unsigned int keysize) --{ -- return chacha_setkey(tfm, key, keysize, 12); --} -- - static int crypto_chacha_crypt(struct skcipher_request *req) - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -@@ -91,7 +79,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = crypto_chacha_crypt, - .decrypt = crypto_chacha_crypt, - }, { -@@ -106,7 +94,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha20_setkey, -+ .setkey = chacha20_setkey, - .encrypt = crypto_xchacha_crypt, - .decrypt = crypto_xchacha_crypt, - }, { -@@ -121,7 +109,7 @@ static struct skcipher_alg algs[] = { - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, -- .setkey = crypto_chacha12_setkey, -+ .setkey = chacha12_setkey, - .encrypt = crypto_xchacha_crypt, - .decrypt = crypto_xchacha_crypt, - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0035-crypto-x86-chacha-only-unregister-algorithms-if-regi.patch b/target/linux/generic/backport-5.4/080-wireguard-0035-crypto-x86-chacha-only-unregister-algorithms-if-regi.patch deleted file mode 100644 index 4554ea898b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0035-crypto-x86-chacha-only-unregister-algorithms-if-regi.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Eric Biggers <ebiggers@google.com> -Date: Sun, 17 Nov 2019 23:21:58 -0800 -Subject: [PATCH] crypto: x86/chacha - only unregister algorithms if registered - -commit b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 upstream. - -It's not valid to call crypto_unregister_skciphers() without a prior -call to crypto_register_skciphers(). - -Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") -Signed-off-by: Eric Biggers <ebiggers@google.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/chacha_glue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(v - - static void __exit chacha_simd_mod_fini(void) - { -- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -+ if (boot_cpu_has(X86_FEATURE_SSSE3)) -+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); - } - - module_init(chacha_simd_mod_init); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0036-crypto-lib-chacha20poly1305-use-chacha20_crypt.patch b/target/linux/generic/backport-5.4/080-wireguard-0036-crypto-lib-chacha20poly1305-use-chacha20_crypt.patch deleted file mode 100644 index 6ad20b999e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0036-crypto-lib-chacha20poly1305-use-chacha20_crypt.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Eric Biggers <ebiggers@google.com> -Date: Sun, 17 Nov 2019 23:22:16 -0800 -Subject: [PATCH] crypto: lib/chacha20poly1305 - use chacha20_crypt() - -commit 413808b71e6204b0cc1eeaa77960f7c3cd381d33 upstream. - -Use chacha20_crypt() instead of chacha_crypt(), since it's not really -appropriate for users of the ChaCha library API to be passing the number -of rounds as an argument. - -Signed-off-by: Eric Biggers <ebiggers@google.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/chacha20poly1305.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - ---- a/lib/crypto/chacha20poly1305.c -+++ b/lib/crypto/chacha20poly1305.c -@@ -66,14 +66,14 @@ __chacha20poly1305_encrypt(u8 *dst, cons - __le64 lens[2]; - } b; - -- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); - poly1305_init(&poly1305_state, b.block0); - - poly1305_update(&poly1305_state, ad, ad_len); - if (ad_len & 0xf) - poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf)); - -- chacha_crypt(chacha_state, dst, src, src_len, 20); -+ chacha20_crypt(chacha_state, dst, src, src_len); - - poly1305_update(&poly1305_state, dst, src_len); - if (src_len & 0xf) -@@ -140,7 +140,7 @@ __chacha20poly1305_decrypt(u8 *dst, cons - if (unlikely(src_len < POLY1305_DIGEST_SIZE)) - return false; - -- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); - poly1305_init(&poly1305_state, b.block0); - - poly1305_update(&poly1305_state, ad, ad_len); -@@ -160,7 +160,7 @@ __chacha20poly1305_decrypt(u8 *dst, cons - - ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE); - if (likely(!ret)) -- chacha_crypt(chacha_state, dst, src, dst_len, 20); -+ chacha20_crypt(chacha_state, dst, src, dst_len); - - memzero_explicit(&b, sizeof(b)); - -@@ -241,7 +241,7 @@ bool chacha20poly1305_crypt_sg_inplace(s - b.iv[1] = cpu_to_le64(nonce); - - chacha_init(chacha_state, b.k, (u8 *)b.iv); -- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20); -+ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0)); - poly1305_init(&poly1305_state, b.block0); - - if (unlikely(ad_len)) { -@@ -278,14 +278,14 @@ bool chacha20poly1305_crypt_sg_inplace(s - - if (unlikely(length < sl)) - l &= ~(CHACHA_BLOCK_SIZE - 1); -- chacha_crypt(chacha_state, addr, addr, l, 20); -+ chacha20_crypt(chacha_state, addr, addr, l); - addr += l; - length -= l; - } - - if (unlikely(length > 0)) { -- chacha_crypt(chacha_state, b.chacha_stream, pad0, -- CHACHA_BLOCK_SIZE, 20); -+ chacha20_crypt(chacha_state, b.chacha_stream, pad0, -+ CHACHA_BLOCK_SIZE); - crypto_xor(addr, b.chacha_stream, length); - partial = length; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0037-crypto-arch-conditionalize-crypto-api-in-arch-glue-f.patch b/target/linux/generic/backport-5.4/080-wireguard-0037-crypto-arch-conditionalize-crypto-api-in-arch-glue-f.patch deleted file mode 100644 index d510438f1d..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0037-crypto-arch-conditionalize-crypto-api-in-arch-glue-f.patch +++ /dev/null @@ -1,275 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 25 Nov 2019 11:31:12 +0100 -Subject: [PATCH] crypto: arch - conditionalize crypto api in arch glue for lib - code - -commit 8394bfec51e0e565556101bcc4e2fe7551104cd8 upstream. - -For glue code that's used by Zinc, the actual Crypto API functions might -not necessarily exist, and don't need to exist either. Before this -patch, there are valid build configurations that lead to a unbuildable -kernel. This fixes it to conditionalize those symbols on the existence -of the proper config entry. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 26 ++++++++++++++++---------- - arch/arm/crypto/curve25519-glue.c | 5 +++-- - arch/arm/crypto/poly1305-glue.c | 9 ++++++--- - arch/arm64/crypto/chacha-neon-glue.c | 5 +++-- - arch/arm64/crypto/poly1305-glue.c | 5 +++-- - arch/mips/crypto/chacha-glue.c | 6 ++++-- - arch/mips/crypto/poly1305-glue.c | 6 ++++-- - arch/x86/crypto/blake2s-glue.c | 6 ++++-- - arch/x86/crypto/chacha_glue.c | 5 +++-- - arch/x86/crypto/curve25519-x86_64.c | 7 ++++--- - arch/x86/crypto/poly1305_glue.c | 5 +++-- - 11 files changed, 53 insertions(+), 32 deletions(-) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -286,11 +286,13 @@ static struct skcipher_alg neon_algs[] = - - static int __init chacha_simd_mod_init(void) - { -- int err; -+ int err = 0; - -- err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -- if (err) -- return err; -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { -+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ if (err) -+ return err; -+ } - - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { - int i; -@@ -310,18 +312,22 @@ static int __init chacha_simd_mod_init(v - static_branch_enable(&use_neon); - } - -- err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -- if (err) -- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { -+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -+ if (err) -+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ } - } - return err; - } - - static void __exit chacha_simd_mod_fini(void) - { -- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) -- crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) { -+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); -+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) -+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); -+ } - } - - module_init(chacha_simd_mod_init); ---- a/arch/arm/crypto/curve25519-glue.c -+++ b/arch/arm/crypto/curve25519-glue.c -@@ -108,14 +108,15 @@ static int __init mod_init(void) - { - if (elf_hwcap & HWCAP_NEON) { - static_branch_enable(&have_neon); -- return crypto_register_kpp(&curve25519_alg); -+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? -+ crypto_register_kpp(&curve25519_alg) : 0; - } - return 0; - } - - static void __exit mod_exit(void) - { -- if (elf_hwcap & HWCAP_NEON) -+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) - crypto_unregister_kpp(&curve25519_alg); - } - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -249,16 +249,19 @@ static int __init arm_poly1305_mod_init( - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - (elf_hwcap & HWCAP_NEON)) - static_branch_enable(&have_neon); -- else -+ else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - /* register only the first entry */ - return crypto_register_shash(&arm_poly1305_algs[0]); - -- return crypto_register_shashes(arm_poly1305_algs, -- ARRAY_SIZE(arm_poly1305_algs)); -+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? -+ crypto_register_shashes(arm_poly1305_algs, -+ ARRAY_SIZE(arm_poly1305_algs)) : 0; - } - - static void __exit arm_poly1305_mod_exit(void) - { -+ if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) -+ return; - if (!static_branch_likely(&have_neon)) { - crypto_unregister_shash(&arm_poly1305_algs[0]); - return; ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -211,12 +211,13 @@ static int __init chacha_simd_mod_init(v - - static_branch_enable(&have_neon); - -- return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? -+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; - } - - static void __exit chacha_simd_mod_fini(void) - { -- if (cpu_have_named_feature(ASIMD)) -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && cpu_have_named_feature(ASIMD)) - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); - } - ---- a/arch/arm64/crypto/poly1305-glue.c -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -220,12 +220,13 @@ static int __init neon_poly1305_mod_init - - static_branch_enable(&have_neon); - -- return crypto_register_shash(&neon_poly1305_alg); -+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? -+ crypto_register_shash(&neon_poly1305_alg) : 0; - } - - static void __exit neon_poly1305_mod_exit(void) - { -- if (cpu_have_named_feature(ASIMD)) -+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) - crypto_unregister_shash(&neon_poly1305_alg); - } - ---- a/arch/mips/crypto/chacha-glue.c -+++ b/arch/mips/crypto/chacha-glue.c -@@ -128,12 +128,14 @@ static struct skcipher_alg algs[] = { - - static int __init chacha_simd_mod_init(void) - { -- return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? -+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; - } - - static void __exit chacha_simd_mod_fini(void) - { -- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) -+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); - } - - module_init(chacha_simd_mod_init); ---- a/arch/mips/crypto/poly1305-glue.c -+++ b/arch/mips/crypto/poly1305-glue.c -@@ -187,12 +187,14 @@ static struct shash_alg mips_poly1305_al - - static int __init mips_poly1305_mod_init(void) - { -- return crypto_register_shash(&mips_poly1305_alg); -+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? -+ crypto_register_shash(&mips_poly1305_alg) : 0; - } - - static void __exit mips_poly1305_mod_exit(void) - { -- crypto_unregister_shash(&mips_poly1305_alg); -+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) -+ crypto_unregister_shash(&mips_poly1305_alg); - } - - module_init(mips_poly1305_mod_init); ---- a/arch/x86/crypto/blake2s-glue.c -+++ b/arch/x86/crypto/blake2s-glue.c -@@ -210,12 +210,14 @@ static int __init blake2s_mod_init(void) - XFEATURE_MASK_AVX512, NULL)) - static_branch_enable(&blake2s_use_avx512); - -- return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? -+ crypto_register_shashes(blake2s_algs, -+ ARRAY_SIZE(blake2s_algs)) : 0; - } - - static void __exit blake2s_mod_exit(void) - { -- if (boot_cpu_has(X86_FEATURE_SSSE3)) -+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); - } - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -299,12 +299,13 @@ static int __init chacha_simd_mod_init(v - boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ - static_branch_enable(&chacha_use_avx512vl); - } -- return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -+ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ? -+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; - } - - static void __exit chacha_simd_mod_fini(void) - { -- if (boot_cpu_has(X86_FEATURE_SSSE3)) -+ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); - } - ---- a/arch/x86/crypto/curve25519-x86_64.c -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -2457,13 +2457,14 @@ static int __init curve25519_mod_init(vo - static_branch_enable(&curve25519_use_adx); - else - return 0; -- return crypto_register_kpp(&curve25519_alg); -+ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? -+ crypto_register_kpp(&curve25519_alg) : 0; - } - - static void __exit curve25519_mod_exit(void) - { -- if (boot_cpu_has(X86_FEATURE_BMI2) || -- boot_cpu_has(X86_FEATURE_ADX)) -+ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && -+ (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX))) - crypto_unregister_kpp(&curve25519_alg); - } - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -224,12 +224,13 @@ static int __init poly1305_simd_mod_init - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx2); - -- return crypto_register_shash(&alg); -+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; - } - - static void __exit poly1305_simd_mod_exit(void) - { -- crypto_unregister_shash(&alg); -+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) -+ crypto_unregister_shash(&alg); - } - - module_init(poly1305_simd_mod_init); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0038-crypto-chacha-fix-warning-message-in-header-file.patch b/target/linux/generic/backport-5.4/080-wireguard-0038-crypto-chacha-fix-warning-message-in-header-file.patch deleted file mode 100644 index ccd03e3525..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0038-crypto-chacha-fix-warning-message-in-header-file.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@vt.edu> -Date: Thu, 5 Dec 2019 20:58:36 -0500 -Subject: [PATCH] crypto: chacha - fix warning message in header file - -commit 579d705cd64e44f3fcda1a6cfd5f37468a5ddf63 upstream. - -Building with W=1 causes a warning: - - CC [M] arch/x86/crypto/chacha_glue.o -In file included from arch/x86/crypto/chacha_glue.c:10: -./include/crypto/internal/chacha.h:37:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] - 37 | static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, - | ^~~~~~ - -Straighten out the order to match the rest of the header file. - -Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/internal/chacha.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/crypto/internal/chacha.h -+++ b/include/crypto/internal/chacha.h -@@ -34,7 +34,7 @@ static inline int chacha20_setkey(struct - return chacha_setkey(tfm, key, keysize, 20); - } - --static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, -+static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keysize) - { - return chacha_setkey(tfm, key, keysize, 12); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0039-crypto-arm-curve25519-add-arch-specific-key-generati.patch b/target/linux/generic/backport-5.4/080-wireguard-0039-crypto-arm-curve25519-add-arch-specific-key-generati.patch deleted file mode 100644 index 67de22deb6..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0039-crypto-arm-curve25519-add-arch-specific-key-generati.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 11 Dec 2019 10:26:39 +0100 -Subject: [PATCH] crypto: arm/curve25519 - add arch-specific key generation - function - -commit 84faa307249b341f6ad8de3e1869d77a65e26669 upstream. - -Somehow this was forgotten when Zinc was being split into oddly shaped -pieces, resulting in linker errors. The x86_64 glue has a specific key -generation implementation, but the Arm one does not. However, it can -still receive the NEON speedups by calling the ordinary DH function -using the base point. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/curve25519-glue.c | 7 +++++++ - 1 file changed, 7 insertions(+) - ---- a/arch/arm/crypto/curve25519-glue.c -+++ b/arch/arm/crypto/curve25519-glue.c -@@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_K - } - EXPORT_SYMBOL(curve25519_arch); - -+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], -+ const u8 secret[CURVE25519_KEY_SIZE]) -+{ -+ return curve25519_arch(pub, secret, curve25519_base_point); -+} -+EXPORT_SYMBOL(curve25519_base_arch); -+ - static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, - unsigned int len) - { diff --git a/target/linux/generic/backport-5.4/080-wireguard-0040-crypto-lib-curve25519-re-add-selftests.patch b/target/linux/generic/backport-5.4/080-wireguard-0040-crypto-lib-curve25519-re-add-selftests.patch deleted file mode 100644 index e43d196a3b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0040-crypto-lib-curve25519-re-add-selftests.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 16 Dec 2019 19:53:26 +0100 -Subject: [PATCH] crypto: lib/curve25519 - re-add selftests - -commit aa127963f1cab2b93c74c9b128a84610203fb674 upstream. - -Somehow these were dropped when Zinc was being integrated, which is -problematic, because testing the library interface for Curve25519 is -important.. This commit simply adds them back and wires them in in the -same way that the blake2s selftests are wired in. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/Makefile | 1 + - lib/crypto/curve25519-selftest.c | 1321 ++++++++++++++++++++++++++++++ - lib/crypto/curve25519.c | 17 + - 3 files changed, 1339 insertions(+) - create mode 100644 lib/crypto/curve25519-selftest.c - ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -36,4 +36,5 @@ libsha256-y := sha256.o - ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y) - libblake2s-y += blake2s-selftest.o - libchacha20poly1305-y += chacha20poly1305-selftest.o -+libcurve25519-y += curve25519-selftest.o - endif ---- /dev/null -+++ b/lib/crypto/curve25519-selftest.c -@@ -0,0 +1,1321 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/curve25519.h> -+ -+struct curve25519_test_vector { -+ u8 private[CURVE25519_KEY_SIZE]; -+ u8 public[CURVE25519_KEY_SIZE]; -+ u8 result[CURVE25519_KEY_SIZE]; -+ bool valid; -+}; -+static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = { -+ { -+ .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, -+ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, -+ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, -+ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, -+ .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, -+ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, -+ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, -+ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, -+ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, -+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, -+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, -+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, -+ .valid = true -+ }, -+ { -+ .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, -+ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, -+ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, -+ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, -+ .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, -+ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, -+ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, -+ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, -+ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, -+ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, -+ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, -+ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, -+ .valid = true -+ }, -+ { -+ .private = { 1 }, -+ .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, -+ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, -+ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, -+ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, -+ .valid = true -+ }, -+ { -+ .private = { 1 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, -+ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, -+ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, -+ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, -+ .valid = true -+ }, -+ { -+ .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, -+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, -+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, -+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, -+ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, -+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, -+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, -+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, -+ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, -+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, -+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, -+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, -+ .valid = true -+ }, -+ { -+ .private = { 1, 2, 3, 4 }, -+ .public = { 0 }, -+ .result = { 0 }, -+ .valid = false -+ }, -+ { -+ .private = { 2, 4, 6, 8 }, -+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, -+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, -+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, -+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, -+ .result = { 0 }, -+ .valid = false -+ }, -+ { -+ .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, -+ .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, -+ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, -+ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, -+ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, -+ .valid = true -+ }, -+ { -+ .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, -+ .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, -+ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, -+ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, -+ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, -+ .valid = true -+ }, -+ /* wycheproof - normal case */ -+ { -+ .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, -+ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, -+ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, -+ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, -+ .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, -+ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, -+ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, -+ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, -+ .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, -+ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, -+ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, -+ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, -+ .valid = true -+ }, -+ /* wycheproof - public key on twist */ -+ { -+ .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, -+ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, -+ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, -+ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, -+ .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, -+ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, -+ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, -+ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, -+ .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, -+ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, -+ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, -+ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, -+ .valid = true -+ }, -+ /* wycheproof - public key on twist */ -+ { -+ .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, -+ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, -+ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, -+ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, -+ .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, -+ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, -+ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, -+ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, -+ .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, -+ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, -+ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, -+ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, -+ .valid = true -+ }, -+ /* wycheproof - public key on twist */ -+ { -+ .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, -+ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, -+ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, -+ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, -+ .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, -+ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, -+ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, -+ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, -+ .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, -+ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, -+ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, -+ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, -+ .valid = true -+ }, -+ /* wycheproof - public key on twist */ -+ { -+ .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, -+ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, -+ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, -+ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, -+ .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, -+ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, -+ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, -+ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, -+ .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, -+ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, -+ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, -+ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, -+ .valid = true -+ }, -+ /* wycheproof - public key on twist */ -+ { -+ .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, -+ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, -+ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, -+ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, -+ .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, -+ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, -+ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, -+ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, -+ .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, -+ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, -+ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, -+ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, -+ .valid = true -+ }, -+ /* wycheproof - public key = 0 */ -+ { -+ .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, -+ 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, -+ 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, -+ 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, -+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key = 1 */ -+ { -+ .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, -+ 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, -+ 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, -+ 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, -+ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, -+ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, -+ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, -+ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, -+ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, -+ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, -+ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, -+ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, -+ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, -+ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, -+ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, -+ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, -+ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, -+ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, -+ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, -+ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, -+ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, -+ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, -+ .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, -+ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, -+ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, -+ .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, -+ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, -+ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, -+ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, -+ .valid = true -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, -+ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, -+ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, -+ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, -+ .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, -+ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, -+ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, -+ .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, -+ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, -+ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, -+ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, -+ .valid = true -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, -+ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, -+ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, -+ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, -+ .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, -+ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, -+ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, -+ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case on twist */ -+ { -+ .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, -+ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, -+ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, -+ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, -+ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, -+ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, -+ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, -+ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, -+ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, -+ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, -+ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, -+ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, -+ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, -+ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, -+ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, -+ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, -+ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, -+ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, -+ .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, -+ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, -+ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, -+ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, -+ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, -+ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, -+ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, -+ .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, -+ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, -+ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, -+ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, -+ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, -+ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, -+ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, -+ .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, -+ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, -+ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, -+ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, -+ .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, -+ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, -+ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, -+ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, -+ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, -+ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, -+ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, -+ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, -+ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, -+ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, -+ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, -+ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, -+ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, -+ .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, -+ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, -+ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, -+ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for public key */ -+ { -+ .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, -+ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, -+ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, -+ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, -+ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, -+ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, -+ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, -+ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, -+ .valid = true -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, -+ 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, -+ 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, -+ 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, -+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, -+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, -+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, -+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, -+ 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, -+ 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, -+ 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, -+ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, -+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, -+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, -+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, -+ 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, -+ 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, -+ 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, -+ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, -+ 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, -+ 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, -+ 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, -+ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, -+ 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, -+ 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, -+ 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, -+ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, -+ 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, -+ 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, -+ 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, -+ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, -+ 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, -+ 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, -+ 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, -+ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, -+ 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, -+ 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, -+ 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, -+ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, -+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, -+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, -+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, -+ 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, -+ 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, -+ 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, -+ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, -+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, -+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, -+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, -+ 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, -+ 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, -+ 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, -+ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, -+ 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, -+ 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, -+ 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, -+ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key with low order */ -+ { -+ .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, -+ 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, -+ 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, -+ 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, -+ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = false -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, -+ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, -+ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, -+ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, -+ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, -+ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, -+ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, -+ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, -+ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, -+ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, -+ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, -+ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, -+ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, -+ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, -+ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, -+ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, -+ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, -+ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, -+ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, -+ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, -+ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, -+ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, -+ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, -+ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, -+ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, -+ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, -+ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, -+ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, -+ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, -+ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, -+ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, -+ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, -+ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, -+ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, -+ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, -+ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, -+ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, -+ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, -+ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, -+ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, -+ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, -+ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, -+ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, -+ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, -+ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, -+ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, -+ .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, -+ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, -+ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, -+ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, -+ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, -+ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, -+ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, -+ .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, -+ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, -+ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, -+ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, -+ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, -+ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, -+ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, -+ .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, -+ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, -+ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, -+ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, -+ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, -+ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, -+ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, -+ .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, -+ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, -+ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, -+ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, -+ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, -+ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, -+ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, -+ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, -+ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, -+ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, -+ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, -+ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, -+ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, -+ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, -+ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, -+ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, -+ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, -+ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, -+ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, -+ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, -+ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, -+ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, -+ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, -+ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, -+ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, -+ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, -+ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, -+ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, -+ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, -+ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, -+ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, -+ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, -+ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, -+ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, -+ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, -+ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, -+ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, -+ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, -+ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, -+ .valid = true -+ }, -+ /* wycheproof - public key >= p */ -+ { -+ .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, -+ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, -+ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, -+ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, -+ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -+ .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, -+ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, -+ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, -+ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, -+ .valid = true -+ }, -+ /* wycheproof - RFC 7748 */ -+ { -+ .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, -+ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, -+ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, -+ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, -+ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, -+ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, -+ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, -+ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, -+ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, -+ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, -+ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, -+ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, -+ .valid = true -+ }, -+ /* wycheproof - RFC 7748 */ -+ { -+ .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, -+ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, -+ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, -+ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, -+ .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, -+ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, -+ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, -+ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, -+ .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, -+ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, -+ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, -+ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, -+ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, -+ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, -+ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, -+ .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, -+ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, -+ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, -+ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, -+ .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, -+ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, -+ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, -+ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, -+ .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, -+ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, -+ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, -+ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, -+ .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, -+ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, -+ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, -+ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, -+ .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, -+ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, -+ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, -+ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, -+ .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, -+ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, -+ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, -+ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, -+ .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, -+ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, -+ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, -+ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, -+ .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, -+ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, -+ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, -+ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, -+ .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, -+ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, -+ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, -+ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, -+ .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, -+ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, -+ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, -+ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, -+ .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, -+ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, -+ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, -+ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, -+ .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, -+ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, -+ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, -+ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, -+ .valid = true -+ }, -+ /* wycheproof - edge case for shared secret */ -+ { -+ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, -+ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, -+ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, -+ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, -+ .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, -+ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, -+ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, -+ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, -+ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, -+ .valid = true -+ }, -+ /* wycheproof - checking for overflow */ -+ { -+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, -+ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, -+ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, -+ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, -+ .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, -+ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, -+ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, -+ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, -+ .valid = true -+ }, -+ /* wycheproof - checking for overflow */ -+ { -+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, -+ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, -+ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, -+ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, -+ .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, -+ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, -+ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, -+ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, -+ .valid = true -+ }, -+ /* wycheproof - checking for overflow */ -+ { -+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, -+ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, -+ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, -+ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, -+ .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, -+ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, -+ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, -+ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, -+ .valid = true -+ }, -+ /* wycheproof - checking for overflow */ -+ { -+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, -+ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, -+ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, -+ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, -+ .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, -+ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, -+ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, -+ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, -+ .valid = true -+ }, -+ /* wycheproof - checking for overflow */ -+ { -+ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, -+ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, -+ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, -+ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, -+ .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, -+ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, -+ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, -+ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, -+ .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, -+ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, -+ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, -+ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, -+ .valid = true -+ }, -+ /* wycheproof - private key == -1 (mod order) */ -+ { -+ .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, -+ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, -+ .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, -+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, -+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, -+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, -+ .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, -+ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, -+ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, -+ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, -+ .valid = true -+ }, -+ /* wycheproof - private key == 1 (mod order) on twist */ -+ { -+ .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, -+ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, -+ .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, -+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, -+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, -+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, -+ .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, -+ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, -+ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, -+ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, -+ .valid = true -+ } -+}; -+ -+bool __init curve25519_selftest(void) -+{ -+ bool success = true, ret, ret2; -+ size_t i = 0, j; -+ u8 in[CURVE25519_KEY_SIZE]; -+ u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE], -+ out3[CURVE25519_KEY_SIZE]; -+ -+ for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { -+ memset(out, 0, CURVE25519_KEY_SIZE); -+ ret = curve25519(out, curve25519_test_vectors[i].private, -+ curve25519_test_vectors[i].public); -+ if (ret != curve25519_test_vectors[i].valid || -+ memcmp(out, curve25519_test_vectors[i].result, -+ CURVE25519_KEY_SIZE)) { -+ pr_err("curve25519 self-test %zu: FAIL\n", i + 1); -+ success = false; -+ } -+ } -+ -+ for (i = 0; i < 5; ++i) { -+ get_random_bytes(in, sizeof(in)); -+ ret = curve25519_generate_public(out, in); -+ ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); -+ curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); -+ if (ret != ret2 || -+ memcmp(out, out2, CURVE25519_KEY_SIZE) || -+ memcmp(out, out3, CURVE25519_KEY_SIZE)) { -+ pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x", -+ i + 1); -+ for (j = CURVE25519_KEY_SIZE; j-- > 0;) -+ printk(KERN_CONT "%02x", in[j]); -+ printk(KERN_CONT "\n"); -+ success = false; -+ } -+ } -+ -+ return success; -+} ---- a/lib/crypto/curve25519.c -+++ b/lib/crypto/curve25519.c -@@ -13,6 +13,8 @@ - #include <linux/module.h> - #include <linux/init.h> - -+bool curve25519_selftest(void); -+ - const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; - const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; - -@@ -20,6 +22,21 @@ EXPORT_SYMBOL(curve25519_null_point); - EXPORT_SYMBOL(curve25519_base_point); - EXPORT_SYMBOL(curve25519_generic); - -+static int __init mod_init(void) -+{ -+ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && -+ WARN_ON(!curve25519_selftest())) -+ return -ENODEV; -+ return 0; -+} -+ -+static void __exit mod_exit(void) -+{ -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+ - MODULE_LICENSE("GPL v2"); - MODULE_DESCRIPTION("Curve25519 scalar multiplication"); - MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0041-crypto-poly1305-add-new-32-and-64-bit-generic-versio.patch b/target/linux/generic/backport-5.4/080-wireguard-0041-crypto-poly1305-add-new-32-and-64-bit-generic-versio.patch deleted file mode 100644 index c41ef55b14..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0041-crypto-poly1305-add-new-32-and-64-bit-generic-versio.patch +++ /dev/null @@ -1,1164 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 5 Jan 2020 22:40:46 -0500 -Subject: [PATCH] crypto: poly1305 - add new 32 and 64-bit generic versions - -commit 1c08a104360f3e18f4ee6346c21cc3923efb952e upstream. - -These two C implementations from Zinc -- a 32x32 one and a 64x64 one, -depending on the platform -- come from Andrew Moon's public domain -poly1305-donna portable code, modified for usage in the kernel. The -precomputation in the 32-bit version and the use of 64x64 multiplies in -the 64-bit version make these perform better than the code it replaces. -Moon's code is also very widespread and has received many eyeballs of -scrutiny. - -There's a bit of interference between the x86 implementation, which -relies on internal details of the old scalar implementation. In the next -commit, the x86 implementation will be replaced with a faster one that -doesn't rely on this, so none of this matters much. But for now, to keep -this passing the tests, we inline the bits of the old implementation -that the x86 implementation relied on. Also, since we now support a -slightly larger key space, via the union, some offsets had to be fixed -up. - -Nonce calculation was folded in with the emit function, to take -advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no -nonce handling in emit, so this path was conditionalized. We also -introduced a new struct, poly1305_core_key, to represent the precise -amount of space that particular implementation uses. - -Testing with kbench9000, depending on the CPU, the update function for -the 32x32 version has been improved by 4%-7%, and for the 64x64 by -19%-30%. The 32x32 gains are small, but I think there's great value in -having a parallel implementation to the 64x64 one so that the two can be -compared side-by-side as nice stand-alone units. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305-avx2-x86_64.S | 20 +-- - arch/x86/crypto/poly1305_glue.c | 215 +++++++++++++++++++++++-- - crypto/adiantum.c | 4 +- - crypto/nhpoly1305.c | 2 +- - crypto/poly1305_generic.c | 25 ++- - include/crypto/internal/poly1305.h | 45 ++---- - include/crypto/nhpoly1305.h | 4 +- - include/crypto/poly1305.h | 26 ++- - lib/crypto/Makefile | 4 +- - lib/crypto/poly1305-donna32.c | 204 +++++++++++++++++++++++ - lib/crypto/poly1305-donna64.c | 185 +++++++++++++++++++++ - lib/crypto/poly1305.c | 169 +------------------ - 12 files changed, 675 insertions(+), 228 deletions(-) - create mode 100644 lib/crypto/poly1305-donna32.c - create mode 100644 lib/crypto/poly1305-donna64.c - ---- a/arch/x86/crypto/poly1305-avx2-x86_64.S -+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S -@@ -34,16 +34,16 @@ ORMASK: .octa 0x000000000100000000000000 - #define u2 0x08(%r8) - #define u3 0x0c(%r8) - #define u4 0x10(%r8) --#define w0 0x14(%r8) --#define w1 0x18(%r8) --#define w2 0x1c(%r8) --#define w3 0x20(%r8) --#define w4 0x24(%r8) --#define y0 0x28(%r8) --#define y1 0x2c(%r8) --#define y2 0x30(%r8) --#define y3 0x34(%r8) --#define y4 0x38(%r8) -+#define w0 0x18(%r8) -+#define w1 0x1c(%r8) -+#define w2 0x20(%r8) -+#define w3 0x24(%r8) -+#define w4 0x28(%r8) -+#define y0 0x30(%r8) -+#define y1 0x34(%r8) -+#define y2 0x38(%r8) -+#define y3 0x3c(%r8) -+#define y4 0x40(%r8) - #define m %rsi - #define hc0 %ymm0 - #define hc1 %ymm1 ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -25,6 +25,21 @@ asmlinkage void poly1305_4block_avx2(u32 - static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); - static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); - -+static inline u64 mlt(u64 a, u64 b) -+{ -+ return a * b; -+} -+ -+static inline u32 sr(u64 v, u_char n) -+{ -+ return v >> n; -+} -+ -+static inline u32 and(u32 v, u32 mask) -+{ -+ return v & mask; -+} -+ - static void poly1305_simd_mult(u32 *a, const u32 *b) - { - u8 m[POLY1305_BLOCK_SIZE]; -@@ -36,6 +51,168 @@ static void poly1305_simd_mult(u32 *a, c - poly1305_block_sse2(a, m, b, 1); - } - -+static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key) -+{ -+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; -+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; -+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; -+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; -+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; -+} -+ -+static void poly1305_integer_blocks(struct poly1305_state *state, -+ const struct poly1305_key *key, -+ const void *src, -+ unsigned int nblocks, u32 hibit) -+{ -+ u32 r0, r1, r2, r3, r4; -+ u32 s1, s2, s3, s4; -+ u32 h0, h1, h2, h3, h4; -+ u64 d0, d1, d2, d3, d4; -+ -+ if (!nblocks) -+ return; -+ -+ r0 = key->r[0]; -+ r1 = key->r[1]; -+ r2 = key->r[2]; -+ r3 = key->r[3]; -+ r4 = key->r[4]; -+ -+ s1 = r1 * 5; -+ s2 = r2 * 5; -+ s3 = r3 * 5; -+ s4 = r4 * 5; -+ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ do { -+ /* h += m[i] */ -+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; -+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; -+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; -+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; -+ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); -+ -+ /* h *= r */ -+ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + -+ mlt(h3, s2) + mlt(h4, s1); -+ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + -+ mlt(h3, s3) + mlt(h4, s2); -+ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + -+ mlt(h3, s4) + mlt(h4, s3); -+ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + -+ mlt(h3, r0) + mlt(h4, s4); -+ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + -+ mlt(h3, r1) + mlt(h4, r0); -+ -+ /* (partial) h %= p */ -+ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); -+ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); -+ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); -+ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); -+ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); -+ h1 += h0 >> 26; h0 = h0 & 0x3ffffff; -+ -+ src += POLY1305_BLOCK_SIZE; -+ } while (--nblocks); -+ -+ state->h[0] = h0; -+ state->h[1] = h1; -+ state->h[2] = h2; -+ state->h[3] = h3; -+ state->h[4] = h4; -+} -+ -+static void poly1305_integer_emit(const struct poly1305_state *state, void *dst) -+{ -+ u32 h0, h1, h2, h3, h4; -+ u32 g0, g1, g2, g3, g4; -+ u32 mask; -+ -+ /* fully carry h */ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; -+ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; -+ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; -+ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; -+ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; -+ -+ /* compute h + -p */ -+ g0 = h0 + 5; -+ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; -+ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; -+ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; -+ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; -+ -+ /* select h if h < p, or h + -p if h >= p */ -+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -+ g0 &= mask; -+ g1 &= mask; -+ g2 &= mask; -+ g3 &= mask; -+ g4 &= mask; -+ mask = ~mask; -+ h0 = (h0 & mask) | g0; -+ h1 = (h1 & mask) | g1; -+ h2 = (h2 & mask) | g2; -+ h3 = (h3 & mask) | g3; -+ h4 = (h4 & mask) | g4; -+ -+ /* h = h % (2^128) */ -+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); -+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); -+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); -+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); -+} -+ -+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) -+{ -+ poly1305_integer_setkey(desc->opaque_r, key); -+ desc->s[0] = get_unaligned_le32(key + 16); -+ desc->s[1] = get_unaligned_le32(key + 20); -+ desc->s[2] = get_unaligned_le32(key + 24); -+ desc->s[3] = get_unaligned_le32(key + 28); -+ poly1305_core_init(&desc->h); -+ desc->buflen = 0; -+ desc->sset = true; -+ desc->rset = 1; -+} -+EXPORT_SYMBOL_GPL(poly1305_init_arch); -+ -+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -+ const u8 *src, unsigned int srclen) -+{ -+ if (!dctx->sset) { -+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -+ poly1305_integer_setkey(dctx->r, src); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->rset = 1; -+ } -+ if (srclen >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ } -+ return srclen; -+} -+ - static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, - const u8 *src, unsigned int srclen) - { -@@ -47,8 +224,8 @@ static unsigned int poly1305_scalar_bloc - srclen = datalen; - } - if (srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_core_blocks(&dctx->h, dctx->r, src, -- srclen / POLY1305_BLOCK_SIZE, 1); -+ poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src, -+ srclen / POLY1305_BLOCK_SIZE, 1); - srclen %= POLY1305_BLOCK_SIZE; - } - return srclen; -@@ -105,12 +282,6 @@ static unsigned int poly1305_simd_blocks - return srclen; - } - --void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) --{ -- poly1305_init_generic(desc, key); --} --EXPORT_SYMBOL(poly1305_init_arch); -- - void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) - { -@@ -158,9 +329,31 @@ void poly1305_update_arch(struct poly130 - } - EXPORT_SYMBOL(poly1305_update_arch); - --void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest) -+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst) - { -- poly1305_final_generic(desc, digest); -+ __le32 digest[4]; -+ u64 f = 0; -+ -+ if (unlikely(desc->buflen)) { -+ desc->buf[desc->buflen++] = 1; -+ memset(desc->buf + desc->buflen, 0, -+ POLY1305_BLOCK_SIZE - desc->buflen); -+ poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0); -+ } -+ -+ poly1305_integer_emit(&desc->h, digest); -+ -+ /* mac = (h + s) % (2^128) */ -+ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; -+ put_unaligned_le32(f, dst + 0); -+ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; -+ put_unaligned_le32(f, dst + 4); -+ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; -+ put_unaligned_le32(f, dst + 8); -+ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; -+ put_unaligned_le32(f, dst + 12); -+ -+ *desc = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL(poly1305_final_arch); - -@@ -183,7 +376,7 @@ static int crypto_poly1305_final(struct - if (unlikely(!dctx->sset)) - return -ENOKEY; - -- poly1305_final_generic(dctx, dst); -+ poly1305_final_arch(dctx, dst); - return 0; - } - ---- a/crypto/adiantum.c -+++ b/crypto/adiantum.c -@@ -72,7 +72,7 @@ struct adiantum_tfm_ctx { - struct crypto_skcipher *streamcipher; - struct crypto_cipher *blockcipher; - struct crypto_shash *hash; -- struct poly1305_key header_hash_key; -+ struct poly1305_core_key header_hash_key; - }; - - struct adiantum_request_ctx { -@@ -249,7 +249,7 @@ static void adiantum_hash_header(struct - poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, - TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); - -- poly1305_core_emit(&state, &rctx->header_hash); -+ poly1305_core_emit(&state, NULL, &rctx->header_hash); - } - - /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */ ---- a/crypto/nhpoly1305.c -+++ b/crypto/nhpoly1305.c -@@ -210,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struc - if (state->nh_remaining) - process_nh_hash_value(state, key); - -- poly1305_core_emit(&state->poly_state, dst); -+ poly1305_core_emit(&state->poly_state, NULL, dst); - return 0; - } - EXPORT_SYMBOL(crypto_nhpoly1305_final_helper); ---- a/crypto/poly1305_generic.c -+++ b/crypto/poly1305_generic.c -@@ -31,6 +31,29 @@ static int crypto_poly1305_init(struct s - return 0; - } - -+static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -+ const u8 *src, unsigned int srclen) -+{ -+ if (!dctx->sset) { -+ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -+ poly1305_core_setkey(&dctx->core_r, src); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->rset = 2; -+ } -+ if (srclen >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(src + 0); -+ dctx->s[1] = get_unaligned_le32(src + 4); -+ dctx->s[2] = get_unaligned_le32(src + 8); -+ dctx->s[3] = get_unaligned_le32(src + 12); -+ src += POLY1305_BLOCK_SIZE; -+ srclen -= POLY1305_BLOCK_SIZE; -+ dctx->sset = true; -+ } -+ } -+ return srclen; -+} -+ - static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) - { -@@ -42,7 +65,7 @@ static void poly1305_blocks(struct poly1 - srclen = datalen; - } - -- poly1305_core_blocks(&dctx->h, dctx->r, src, -+ poly1305_core_blocks(&dctx->h, &dctx->core_r, src, - srclen / POLY1305_BLOCK_SIZE, 1); - } - ---- a/include/crypto/internal/poly1305.h -+++ b/include/crypto/internal/poly1305.h -@@ -11,48 +11,23 @@ - #include <crypto/poly1305.h> - - /* -- * Poly1305 core functions. These implement the ε-almost-∆-universal hash -- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce -- * ("s key") at the end. They also only support block-aligned inputs. -+ * Poly1305 core functions. These only accept whole blocks; the caller must -+ * handle any needed block buffering and padding. 'hibit' must be 1 for any -+ * full blocks, or 0 for the final block if it had to be padded. If 'nonce' is -+ * non-NULL, then it's added at the end to compute the Poly1305 MAC. Otherwise, -+ * only the ε-almost-∆-universal hash function (not the full MAC) is computed. - */ --void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key); -+ -+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); - static inline void poly1305_core_init(struct poly1305_state *state) - { - *state = (struct poly1305_state){}; - } - - void poly1305_core_blocks(struct poly1305_state *state, -- const struct poly1305_key *key, const void *src, -+ const struct poly1305_core_key *key, const void *src, - unsigned int nblocks, u32 hibit); --void poly1305_core_emit(const struct poly1305_state *state, void *dst); -- --/* -- * Poly1305 requires a unique key for each tag, which implies that we can't set -- * it on the tfm that gets accessed by multiple users simultaneously. Instead we -- * expect the key as the first 32 bytes in the update() call. -- */ --static inline --unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen) --{ -- if (!dctx->sset) { -- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_core_setkey(dctx->r, src); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->rset = 1; -- } -- if (srclen >= POLY1305_BLOCK_SIZE) { -- dctx->s[0] = get_unaligned_le32(src + 0); -- dctx->s[1] = get_unaligned_le32(src + 4); -- dctx->s[2] = get_unaligned_le32(src + 8); -- dctx->s[3] = get_unaligned_le32(src + 12); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->sset = true; -- } -- } -- return srclen; --} -+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], -+ void *dst); - - #endif ---- a/include/crypto/nhpoly1305.h -+++ b/include/crypto/nhpoly1305.h -@@ -7,7 +7,7 @@ - #define _NHPOLY1305_H - - #include <crypto/hash.h> --#include <crypto/poly1305.h> -+#include <crypto/internal/poly1305.h> - - /* NH parameterization: */ - -@@ -33,7 +33,7 @@ - #define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES) - - struct nhpoly1305_key { -- struct poly1305_key poly_key; -+ struct poly1305_core_key poly_key; - u32 nh_key[NH_KEY_WORDS]; - }; - ---- a/include/crypto/poly1305.h -+++ b/include/crypto/poly1305.h -@@ -13,12 +13,29 @@ - #define POLY1305_KEY_SIZE 32 - #define POLY1305_DIGEST_SIZE 16 - -+/* The poly1305_key and poly1305_state types are mostly opaque and -+ * implementation-defined. Limbs might be in base 2^64 or base 2^26, or -+ * different yet. The union type provided keeps these 64-bit aligned for the -+ * case in which this is implemented using 64x64 multiplies. -+ */ -+ - struct poly1305_key { -- u32 r[5]; /* key, base 2^26 */ -+ union { -+ u32 r[5]; -+ u64 r64[3]; -+ }; -+}; -+ -+struct poly1305_core_key { -+ struct poly1305_key key; -+ struct poly1305_key precomputed_s; - }; - - struct poly1305_state { -- u32 h[5]; /* accumulator, base 2^26 */ -+ union { -+ u32 h[5]; -+ u64 h64[3]; -+ }; - }; - - struct poly1305_desc_ctx { -@@ -35,7 +52,10 @@ struct poly1305_desc_ctx { - /* accumulator */ - struct poly1305_state h; - /* key */ -- struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; -+ union { -+ struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE]; -+ struct poly1305_core_key core_r; -+ }; - }; - - void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -28,7 +28,9 @@ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes - libdes-y := des.o - - obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o --libpoly1305-y := poly1305.o -+libpoly1305-y := poly1305-donna32.o -+libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o -+libpoly1305-y += poly1305.o - - obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o - libsha256-y := sha256.o ---- /dev/null -+++ b/lib/crypto/poly1305-donna32.c -@@ -0,0 +1,204 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is based in part on Andrew Moon's poly1305-donna, which is in the -+ * public domain. -+ */ -+ -+#include <linux/kernel.h> -+#include <asm/unaligned.h> -+#include <crypto/internal/poly1305.h> -+ -+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) -+{ -+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -+ key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; -+ key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03; -+ key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff; -+ key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff; -+ key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff; -+ -+ /* s = 5*r */ -+ key->precomputed_s.r[0] = key->key.r[1] * 5; -+ key->precomputed_s.r[1] = key->key.r[2] * 5; -+ key->precomputed_s.r[2] = key->key.r[3] * 5; -+ key->precomputed_s.r[3] = key->key.r[4] * 5; -+} -+EXPORT_SYMBOL(poly1305_core_setkey); -+ -+void poly1305_core_blocks(struct poly1305_state *state, -+ const struct poly1305_core_key *key, const void *src, -+ unsigned int nblocks, u32 hibit) -+{ -+ const u8 *input = src; -+ u32 r0, r1, r2, r3, r4; -+ u32 s1, s2, s3, s4; -+ u32 h0, h1, h2, h3, h4; -+ u64 d0, d1, d2, d3, d4; -+ u32 c; -+ -+ if (!nblocks) -+ return; -+ -+ hibit <<= 24; -+ -+ r0 = key->key.r[0]; -+ r1 = key->key.r[1]; -+ r2 = key->key.r[2]; -+ r3 = key->key.r[3]; -+ r4 = key->key.r[4]; -+ -+ s1 = key->precomputed_s.r[0]; -+ s2 = key->precomputed_s.r[1]; -+ s3 = key->precomputed_s.r[2]; -+ s4 = key->precomputed_s.r[3]; -+ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ do { -+ /* h += m[i] */ -+ h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff; -+ h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff; -+ h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff; -+ h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff; -+ h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit; -+ -+ /* h *= r */ -+ d0 = ((u64)h0 * r0) + ((u64)h1 * s4) + -+ ((u64)h2 * s3) + ((u64)h3 * s2) + -+ ((u64)h4 * s1); -+ d1 = ((u64)h0 * r1) + ((u64)h1 * r0) + -+ ((u64)h2 * s4) + ((u64)h3 * s3) + -+ ((u64)h4 * s2); -+ d2 = ((u64)h0 * r2) + ((u64)h1 * r1) + -+ ((u64)h2 * r0) + ((u64)h3 * s4) + -+ ((u64)h4 * s3); -+ d3 = ((u64)h0 * r3) + ((u64)h1 * r2) + -+ ((u64)h2 * r1) + ((u64)h3 * r0) + -+ ((u64)h4 * s4); -+ d4 = ((u64)h0 * r4) + ((u64)h1 * r3) + -+ ((u64)h2 * r2) + ((u64)h3 * r1) + -+ ((u64)h4 * r0); -+ -+ /* (partial) h %= p */ -+ c = (u32)(d0 >> 26); -+ h0 = (u32)d0 & 0x3ffffff; -+ d1 += c; -+ c = (u32)(d1 >> 26); -+ h1 = (u32)d1 & 0x3ffffff; -+ d2 += c; -+ c = (u32)(d2 >> 26); -+ h2 = (u32)d2 & 0x3ffffff; -+ d3 += c; -+ c = (u32)(d3 >> 26); -+ h3 = (u32)d3 & 0x3ffffff; -+ d4 += c; -+ c = (u32)(d4 >> 26); -+ h4 = (u32)d4 & 0x3ffffff; -+ h0 += c * 5; -+ c = (h0 >> 26); -+ h0 = h0 & 0x3ffffff; -+ h1 += c; -+ -+ input += POLY1305_BLOCK_SIZE; -+ } while (--nblocks); -+ -+ state->h[0] = h0; -+ state->h[1] = h1; -+ state->h[2] = h2; -+ state->h[3] = h3; -+ state->h[4] = h4; -+} -+EXPORT_SYMBOL(poly1305_core_blocks); -+ -+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], -+ void *dst) -+{ -+ u8 *mac = dst; -+ u32 h0, h1, h2, h3, h4, c; -+ u32 g0, g1, g2, g3, g4; -+ u64 f; -+ u32 mask; -+ -+ /* fully carry h */ -+ h0 = state->h[0]; -+ h1 = state->h[1]; -+ h2 = state->h[2]; -+ h3 = state->h[3]; -+ h4 = state->h[4]; -+ -+ c = h1 >> 26; -+ h1 = h1 & 0x3ffffff; -+ h2 += c; -+ c = h2 >> 26; -+ h2 = h2 & 0x3ffffff; -+ h3 += c; -+ c = h3 >> 26; -+ h3 = h3 & 0x3ffffff; -+ h4 += c; -+ c = h4 >> 26; -+ h4 = h4 & 0x3ffffff; -+ h0 += c * 5; -+ c = h0 >> 26; -+ h0 = h0 & 0x3ffffff; -+ h1 += c; -+ -+ /* compute h + -p */ -+ g0 = h0 + 5; -+ c = g0 >> 26; -+ g0 &= 0x3ffffff; -+ g1 = h1 + c; -+ c = g1 >> 26; -+ g1 &= 0x3ffffff; -+ g2 = h2 + c; -+ c = g2 >> 26; -+ g2 &= 0x3ffffff; -+ g3 = h3 + c; -+ c = g3 >> 26; -+ g3 &= 0x3ffffff; -+ g4 = h4 + c - (1UL << 26); -+ -+ /* select h if h < p, or h + -p if h >= p */ -+ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -+ g0 &= mask; -+ g1 &= mask; -+ g2 &= mask; -+ g3 &= mask; -+ g4 &= mask; -+ mask = ~mask; -+ -+ h0 = (h0 & mask) | g0; -+ h1 = (h1 & mask) | g1; -+ h2 = (h2 & mask) | g2; -+ h3 = (h3 & mask) | g3; -+ h4 = (h4 & mask) | g4; -+ -+ /* h = h % (2^128) */ -+ h0 = ((h0) | (h1 << 26)) & 0xffffffff; -+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; -+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; -+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; -+ -+ if (likely(nonce)) { -+ /* mac = (h + nonce) % (2^128) */ -+ f = (u64)h0 + nonce[0]; -+ h0 = (u32)f; -+ f = (u64)h1 + nonce[1] + (f >> 32); -+ h1 = (u32)f; -+ f = (u64)h2 + nonce[2] + (f >> 32); -+ h2 = (u32)f; -+ f = (u64)h3 + nonce[3] + (f >> 32); -+ h3 = (u32)f; -+ } -+ -+ put_unaligned_le32(h0, &mac[0]); -+ put_unaligned_le32(h1, &mac[4]); -+ put_unaligned_le32(h2, &mac[8]); -+ put_unaligned_le32(h3, &mac[12]); -+} -+EXPORT_SYMBOL(poly1305_core_emit); ---- /dev/null -+++ b/lib/crypto/poly1305-donna64.c -@@ -0,0 +1,185 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is based in part on Andrew Moon's poly1305-donna, which is in the -+ * public domain. -+ */ -+ -+#include <linux/kernel.h> -+#include <asm/unaligned.h> -+#include <crypto/internal/poly1305.h> -+ -+typedef __uint128_t u128; -+ -+void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) -+{ -+ u64 t0, t1; -+ -+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -+ t0 = get_unaligned_le64(&raw_key[0]); -+ t1 = get_unaligned_le64(&raw_key[8]); -+ -+ key->key.r64[0] = t0 & 0xffc0fffffffULL; -+ key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL; -+ key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL; -+ -+ /* s = 20*r */ -+ key->precomputed_s.r64[0] = key->key.r64[1] * 20; -+ key->precomputed_s.r64[1] = key->key.r64[2] * 20; -+} -+EXPORT_SYMBOL(poly1305_core_setkey); -+ -+void poly1305_core_blocks(struct poly1305_state *state, -+ const struct poly1305_core_key *key, const void *src, -+ unsigned int nblocks, u32 hibit) -+{ -+ const u8 *input = src; -+ u64 hibit64; -+ u64 r0, r1, r2; -+ u64 s1, s2; -+ u64 h0, h1, h2; -+ u64 c; -+ u128 d0, d1, d2, d; -+ -+ if (!nblocks) -+ return; -+ -+ hibit64 = ((u64)hibit) << 40; -+ -+ r0 = key->key.r64[0]; -+ r1 = key->key.r64[1]; -+ r2 = key->key.r64[2]; -+ -+ h0 = state->h64[0]; -+ h1 = state->h64[1]; -+ h2 = state->h64[2]; -+ -+ s1 = key->precomputed_s.r64[0]; -+ s2 = key->precomputed_s.r64[1]; -+ -+ do { -+ u64 t0, t1; -+ -+ /* h += m[i] */ -+ t0 = get_unaligned_le64(&input[0]); -+ t1 = get_unaligned_le64(&input[8]); -+ -+ h0 += t0 & 0xfffffffffffULL; -+ h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL; -+ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64; -+ -+ /* h *= r */ -+ d0 = (u128)h0 * r0; -+ d = (u128)h1 * s2; -+ d0 += d; -+ d = (u128)h2 * s1; -+ d0 += d; -+ d1 = (u128)h0 * r1; -+ d = (u128)h1 * r0; -+ d1 += d; -+ d = (u128)h2 * s2; -+ d1 += d; -+ d2 = (u128)h0 * r2; -+ d = (u128)h1 * r1; -+ d2 += d; -+ d = (u128)h2 * r0; -+ d2 += d; -+ -+ /* (partial) h %= p */ -+ c = (u64)(d0 >> 44); -+ h0 = (u64)d0 & 0xfffffffffffULL; -+ d1 += c; -+ c = (u64)(d1 >> 44); -+ h1 = (u64)d1 & 0xfffffffffffULL; -+ d2 += c; -+ c = (u64)(d2 >> 42); -+ h2 = (u64)d2 & 0x3ffffffffffULL; -+ h0 += c * 5; -+ c = h0 >> 44; -+ h0 = h0 & 0xfffffffffffULL; -+ h1 += c; -+ -+ input += POLY1305_BLOCK_SIZE; -+ } while (--nblocks); -+ -+ state->h64[0] = h0; -+ state->h64[1] = h1; -+ state->h64[2] = h2; -+} -+EXPORT_SYMBOL(poly1305_core_blocks); -+ -+void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4], -+ void *dst) -+{ -+ u8 *mac = dst; -+ u64 h0, h1, h2, c; -+ u64 g0, g1, g2; -+ u64 t0, t1; -+ -+ /* fully carry h */ -+ h0 = state->h64[0]; -+ h1 = state->h64[1]; -+ h2 = state->h64[2]; -+ -+ c = h1 >> 44; -+ h1 &= 0xfffffffffffULL; -+ h2 += c; -+ c = h2 >> 42; -+ h2 &= 0x3ffffffffffULL; -+ h0 += c * 5; -+ c = h0 >> 44; -+ h0 &= 0xfffffffffffULL; -+ h1 += c; -+ c = h1 >> 44; -+ h1 &= 0xfffffffffffULL; -+ h2 += c; -+ c = h2 >> 42; -+ h2 &= 0x3ffffffffffULL; -+ h0 += c * 5; -+ c = h0 >> 44; -+ h0 &= 0xfffffffffffULL; -+ h1 += c; -+ -+ /* compute h + -p */ -+ g0 = h0 + 5; -+ c = g0 >> 44; -+ g0 &= 0xfffffffffffULL; -+ g1 = h1 + c; -+ c = g1 >> 44; -+ g1 &= 0xfffffffffffULL; -+ g2 = h2 + c - (1ULL << 42); -+ -+ /* select h if h < p, or h + -p if h >= p */ -+ c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1; -+ g0 &= c; -+ g1 &= c; -+ g2 &= c; -+ c = ~c; -+ h0 = (h0 & c) | g0; -+ h1 = (h1 & c) | g1; -+ h2 = (h2 & c) | g2; -+ -+ if (likely(nonce)) { -+ /* h = (h + nonce) */ -+ t0 = ((u64)nonce[1] << 32) | nonce[0]; -+ t1 = ((u64)nonce[3] << 32) | nonce[2]; -+ -+ h0 += t0 & 0xfffffffffffULL; -+ c = h0 >> 44; -+ h0 &= 0xfffffffffffULL; -+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c; -+ c = h1 >> 44; -+ h1 &= 0xfffffffffffULL; -+ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c; -+ h2 &= 0x3ffffffffffULL; -+ } -+ -+ /* mac = h % (2^128) */ -+ h0 = h0 | (h1 << 44); -+ h1 = (h1 >> 20) | (h2 << 24); -+ -+ put_unaligned_le64(h0, &mac[0]); -+ put_unaligned_le64(h1, &mac[8]); -+} -+EXPORT_SYMBOL(poly1305_core_emit); ---- a/lib/crypto/poly1305.c -+++ b/lib/crypto/poly1305.c -@@ -12,151 +12,9 @@ - #include <linux/module.h> - #include <asm/unaligned.h> - --static inline u64 mlt(u64 a, u64 b) --{ -- return a * b; --} -- --static inline u32 sr(u64 v, u_char n) --{ -- return v >> n; --} -- --static inline u32 and(u32 v, u32 mask) --{ -- return v & mask; --} -- --void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key) --{ -- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; -- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; -- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; -- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; -- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; --} --EXPORT_SYMBOL_GPL(poly1305_core_setkey); -- --void poly1305_core_blocks(struct poly1305_state *state, -- const struct poly1305_key *key, const void *src, -- unsigned int nblocks, u32 hibit) --{ -- u32 r0, r1, r2, r3, r4; -- u32 s1, s2, s3, s4; -- u32 h0, h1, h2, h3, h4; -- u64 d0, d1, d2, d3, d4; -- -- if (!nblocks) -- return; -- -- r0 = key->r[0]; -- r1 = key->r[1]; -- r2 = key->r[2]; -- r3 = key->r[3]; -- r4 = key->r[4]; -- -- s1 = r1 * 5; -- s2 = r2 * 5; -- s3 = r3 * 5; -- s4 = r4 * 5; -- -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- do { -- /* h += m[i] */ -- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; -- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; -- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; -- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; -- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); -- -- /* h *= r */ -- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + -- mlt(h3, s2) + mlt(h4, s1); -- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + -- mlt(h3, s3) + mlt(h4, s2); -- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + -- mlt(h3, s4) + mlt(h4, s3); -- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + -- mlt(h3, r0) + mlt(h4, s4); -- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + -- mlt(h3, r1) + mlt(h4, r0); -- -- /* (partial) h %= p */ -- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); -- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); -- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); -- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); -- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); -- h1 += h0 >> 26; h0 = h0 & 0x3ffffff; -- -- src += POLY1305_BLOCK_SIZE; -- } while (--nblocks); -- -- state->h[0] = h0; -- state->h[1] = h1; -- state->h[2] = h2; -- state->h[3] = h3; -- state->h[4] = h4; --} --EXPORT_SYMBOL_GPL(poly1305_core_blocks); -- --void poly1305_core_emit(const struct poly1305_state *state, void *dst) --{ -- u32 h0, h1, h2, h3, h4; -- u32 g0, g1, g2, g3, g4; -- u32 mask; -- -- /* fully carry h */ -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; -- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; -- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; -- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; -- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; -- -- /* compute h + -p */ -- g0 = h0 + 5; -- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; -- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; -- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; -- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; -- -- /* select h if h < p, or h + -p if h >= p */ -- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -- g0 &= mask; -- g1 &= mask; -- g2 &= mask; -- g3 &= mask; -- g4 &= mask; -- mask = ~mask; -- h0 = (h0 & mask) | g0; -- h1 = (h1 & mask) | g1; -- h2 = (h2 & mask) | g2; -- h3 = (h3 & mask) | g3; -- h4 = (h4 & mask) | g4; -- -- /* h = h % (2^128) */ -- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); -- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); -- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); -- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); --} --EXPORT_SYMBOL_GPL(poly1305_core_emit); -- - void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) - { -- poly1305_core_setkey(desc->r, key); -+ poly1305_core_setkey(&desc->core_r, key); - desc->s[0] = get_unaligned_le32(key + 16); - desc->s[1] = get_unaligned_le32(key + 20); - desc->s[2] = get_unaligned_le32(key + 24); -@@ -164,7 +22,7 @@ void poly1305_init_generic(struct poly13 - poly1305_core_init(&desc->h); - desc->buflen = 0; - desc->sset = true; -- desc->rset = 1; -+ desc->rset = 2; - } - EXPORT_SYMBOL_GPL(poly1305_init_generic); - -@@ -181,13 +39,14 @@ void poly1305_update_generic(struct poly - desc->buflen += bytes; - - if (desc->buflen == POLY1305_BLOCK_SIZE) { -- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1); -+ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, -+ 1, 1); - desc->buflen = 0; - } - } - - if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { -- poly1305_core_blocks(&desc->h, desc->r, src, -+ poly1305_core_blocks(&desc->h, &desc->core_r, src, - nbytes / POLY1305_BLOCK_SIZE, 1); - src += nbytes - (nbytes % POLY1305_BLOCK_SIZE); - nbytes %= POLY1305_BLOCK_SIZE; -@@ -202,28 +61,14 @@ EXPORT_SYMBOL_GPL(poly1305_update_generi - - void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst) - { -- __le32 digest[4]; -- u64 f = 0; -- - if (unlikely(desc->buflen)) { - desc->buf[desc->buflen++] = 1; - memset(desc->buf + desc->buflen, 0, - POLY1305_BLOCK_SIZE - desc->buflen); -- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0); -+ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0); - } - -- poly1305_core_emit(&desc->h, digest); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; -- put_unaligned_le32(f, dst + 0); -- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; -- put_unaligned_le32(f, dst + 12); -- -+ poly1305_core_emit(&desc->h, desc->s, dst); - *desc = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL_GPL(poly1305_final_generic); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0042-crypto-x86-poly1305-import-unmodified-cryptogams-imp.patch b/target/linux/generic/backport-5.4/080-wireguard-0042-crypto-x86-poly1305-import-unmodified-cryptogams-imp.patch deleted file mode 100644 index 8e52383ae1..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0042-crypto-x86-poly1305-import-unmodified-cryptogams-imp.patch +++ /dev/null @@ -1,4183 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 5 Jan 2020 22:40:47 -0500 -Subject: [PATCH] crypto: x86/poly1305 - import unmodified cryptogams - implementation - -commit 0896ca2a0cb6127e8a129f1f2a680d49b6b0f65c upstream. - -These x86_64 vectorized implementations come from Andy Polyakov's -CRYPTOGAMS implementation, and are included here in raw form without -modification, so that subsequent commits that fix these up for the -kernel can see how it has changed. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 4159 +++++++++++++++++ - 1 file changed, 4159 insertions(+) - create mode 100644 arch/x86/crypto/poly1305-x86_64-cryptogams.pl - ---- /dev/null -+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl -@@ -0,0 +1,4159 @@ -+#! /usr/bin/env perl -+# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. -+# -+# Licensed under the OpenSSL license (the "License"). You may not use -+# this file except in compliance with the License. You can obtain a copy -+# in the file LICENSE in the source distribution or at -+# https://www.openssl.org/source/license.html -+ -+# -+# ==================================================================== -+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+# -+# This module implements Poly1305 hash for x86_64. -+# -+# March 2015 -+# -+# Initial release. -+# -+# December 2016 -+# -+# Add AVX512F+VL+BW code path. -+# -+# November 2017 -+# -+# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be -+# executed even on Knights Landing. Trigger for modification was -+# observation that AVX512 code paths can negatively affect overall -+# Skylake-X system performance. Since we are likely to suppress -+# AVX512F capability flag [at least on Skylake-X], conversion serves -+# as kind of "investment protection". Note that next *lake processor, -+# Cannolake, has AVX512IFMA code path to execute... -+# -+# Numbers are cycles per processed byte with poly1305_blocks alone, -+# measured with rdtsc at fixed clock frequency. -+# -+# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512 -+# P4 4.46/+120% - -+# Core 2 2.41/+90% - -+# Westmere 1.88/+120% - -+# Sandy Bridge 1.39/+140% 1.10 -+# Haswell 1.14/+175% 1.11 0.65 -+# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35] -+# Silvermont 2.83/+95% - -+# Knights L 3.60/? 1.65 1.10 0.41(***) -+# Goldmont 1.70/+180% - -+# VIA Nano 1.82/+150% - -+# Sledgehammer 1.38/+160% - -+# Bulldozer 2.30/+130% 0.97 -+# Ryzen 1.15/+200% 1.08 1.18 -+# -+# (*) improvement coefficients relative to clang are more modest and -+# are ~50% on most processors, in both cases we are comparing to -+# __int128 code; -+# (**) SSE2 implementation was attempted, but among non-AVX processors -+# it was faster than integer-only code only on older Intel P4 and -+# Core processors, 50-30%, less newer processor is, but slower on -+# contemporary ones, for example almost 2x slower on Atom, and as -+# former are naturally disappearing, SSE2 is deemed unnecessary; -+# (***) strangely enough performance seems to vary from core to core, -+# listed result is best case; -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26); -+} -+ -+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { -+ $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12); -+ $avx += 2 if ($1==2.11 && $2>=8); -+} -+ -+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -+ `ml64 2>&1` =~ /Version ([0-9]+)\./) { -+ $avx = ($1>=10) + ($1>=12); -+} -+ -+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { -+ $avx = ($2>=3.0) + ($2>3.0); -+} -+ -+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; -+*STDOUT=*OUT; -+ -+my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); -+my ($mac,$nonce)=($inp,$len); # *_emit arguments -+my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13)); -+my ($h0,$h1,$h2)=("%r14","%rbx","%rbp"); -+ -+sub poly1305_iteration { -+# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 -+# output: $h0-$h2 *= $r0-$r1 -+$code.=<<___; -+ mulq $h0 # h0*r1 -+ mov %rax,$d2 -+ mov $r0,%rax -+ mov %rdx,$d3 -+ -+ mulq $h0 # h0*r0 -+ mov %rax,$h0 # future $h0 -+ mov $r0,%rax -+ mov %rdx,$d1 -+ -+ mulq $h1 # h1*r0 -+ add %rax,$d2 -+ mov $s1,%rax -+ adc %rdx,$d3 -+ -+ mulq $h1 # h1*s1 -+ mov $h2,$h1 # borrow $h1 -+ add %rax,$h0 -+ adc %rdx,$d1 -+ -+ imulq $s1,$h1 # h2*s1 -+ add $h1,$d2 -+ mov $d1,$h1 -+ adc \$0,$d3 -+ -+ imulq $r0,$h2 # h2*r0 -+ add $d2,$h1 -+ mov \$-4,%rax # mask value -+ adc $h2,$d3 -+ -+ and $d3,%rax # last reduction step -+ mov $d3,$h2 -+ shr \$2,$d3 -+ and \$3,$h2 -+ add $d3,%rax -+ add %rax,$h0 -+ adc \$0,$h1 -+ adc \$0,$h2 -+___ -+} -+ -+######################################################################## -+# Layout of opaque area is following. -+# -+# unsigned __int64 h[3]; # current hash value base 2^64 -+# unsigned __int64 r[2]; # key value base 2^64 -+ -+$code.=<<___; -+.text -+ -+.extern OPENSSL_ia32cap_P -+ -+.globl poly1305_init -+.hidden poly1305_init -+.globl poly1305_blocks -+.hidden poly1305_blocks -+.globl poly1305_emit -+.hidden poly1305_emit -+ -+.type poly1305_init,\@function,3 -+.align 32 -+poly1305_init: -+ xor %rax,%rax -+ mov %rax,0($ctx) # initialize hash value -+ mov %rax,8($ctx) -+ mov %rax,16($ctx) -+ -+ cmp \$0,$inp -+ je .Lno_key -+ -+ lea poly1305_blocks(%rip),%r10 -+ lea poly1305_emit(%rip),%r11 -+___ -+$code.=<<___ if ($avx); -+ mov OPENSSL_ia32cap_P+4(%rip),%r9 -+ lea poly1305_blocks_avx(%rip),%rax -+ lea poly1305_emit_avx(%rip),%rcx -+ bt \$`60-32`,%r9 # AVX? -+ cmovc %rax,%r10 -+ cmovc %rcx,%r11 -+___ -+$code.=<<___ if ($avx>1); -+ lea poly1305_blocks_avx2(%rip),%rax -+ bt \$`5+32`,%r9 # AVX2? -+ cmovc %rax,%r10 -+___ -+$code.=<<___ if ($avx>3); -+ mov \$`(1<<31|1<<21|1<<16)`,%rax -+ shr \$32,%r9 -+ and %rax,%r9 -+ cmp %rax,%r9 -+ je .Linit_base2_44 -+___ -+$code.=<<___; -+ mov \$0x0ffffffc0fffffff,%rax -+ mov \$0x0ffffffc0ffffffc,%rcx -+ and 0($inp),%rax -+ and 8($inp),%rcx -+ mov %rax,24($ctx) -+ mov %rcx,32($ctx) -+___ -+$code.=<<___ if ($flavour !~ /elf32/); -+ mov %r10,0(%rdx) -+ mov %r11,8(%rdx) -+___ -+$code.=<<___ if ($flavour =~ /elf32/); -+ mov %r10d,0(%rdx) -+ mov %r11d,4(%rdx) -+___ -+$code.=<<___; -+ mov \$1,%eax -+.Lno_key: -+ ret -+.size poly1305_init,.-poly1305_init -+ -+.type poly1305_blocks,\@function,4 -+.align 32 -+poly1305_blocks: -+.cfi_startproc -+.Lblocks: -+ shr \$4,$len -+ jz .Lno_data # too short -+ -+ push %rbx -+.cfi_push %rbx -+ push %rbp -+.cfi_push %rbp -+ push %r12 -+.cfi_push %r12 -+ push %r13 -+.cfi_push %r13 -+ push %r14 -+.cfi_push %r14 -+ push %r15 -+.cfi_push %r15 -+.Lblocks_body: -+ -+ mov $len,%r15 # reassign $len -+ -+ mov 24($ctx),$r0 # load r -+ mov 32($ctx),$s1 -+ -+ mov 0($ctx),$h0 # load hash value -+ mov 8($ctx),$h1 -+ mov 16($ctx),$h2 -+ -+ mov $s1,$r1 -+ shr \$2,$s1 -+ mov $r1,%rax -+ add $r1,$s1 # s1 = r1 + (r1 >> 2) -+ jmp .Loop -+ -+.align 32 -+.Loop: -+ add 0($inp),$h0 # accumulate input -+ adc 8($inp),$h1 -+ lea 16($inp),$inp -+ adc $padbit,$h2 -+___ -+ &poly1305_iteration(); -+$code.=<<___; -+ mov $r1,%rax -+ dec %r15 # len-=16 -+ jnz .Loop -+ -+ mov $h0,0($ctx) # store hash value -+ mov $h1,8($ctx) -+ mov $h2,16($ctx) -+ -+ mov 0(%rsp),%r15 -+.cfi_restore %r15 -+ mov 8(%rsp),%r14 -+.cfi_restore %r14 -+ mov 16(%rsp),%r13 -+.cfi_restore %r13 -+ mov 24(%rsp),%r12 -+.cfi_restore %r12 -+ mov 32(%rsp),%rbp -+.cfi_restore %rbp -+ mov 40(%rsp),%rbx -+.cfi_restore %rbx -+ lea 48(%rsp),%rsp -+.cfi_adjust_cfa_offset -48 -+.Lno_data: -+.Lblocks_epilogue: -+ ret -+.cfi_endproc -+.size poly1305_blocks,.-poly1305_blocks -+ -+.type poly1305_emit,\@function,3 -+.align 32 -+poly1305_emit: -+.Lemit: -+ mov 0($ctx),%r8 # load hash value -+ mov 8($ctx),%r9 -+ mov 16($ctx),%r10 -+ -+ mov %r8,%rax -+ add \$5,%r8 # compare to modulus -+ mov %r9,%rcx -+ adc \$0,%r9 -+ adc \$0,%r10 -+ shr \$2,%r10 # did 130-bit value overflow? -+ cmovnz %r8,%rax -+ cmovnz %r9,%rcx -+ -+ add 0($nonce),%rax # accumulate nonce -+ adc 8($nonce),%rcx -+ mov %rax,0($mac) # write result -+ mov %rcx,8($mac) -+ -+ ret -+.size poly1305_emit,.-poly1305_emit -+___ -+if ($avx) { -+ -+######################################################################## -+# Layout of opaque area is following. -+# -+# unsigned __int32 h[5]; # current hash value base 2^26 -+# unsigned __int32 is_base2_26; -+# unsigned __int64 r[2]; # key value base 2^64 -+# unsigned __int64 pad; -+# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9]; -+# -+# where r^n are base 2^26 digits of degrees of multiplier key. There are -+# 5 digits, but last four are interleaved with multiples of 5, totalling -+# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4. -+ -+my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) = -+ map("%xmm$_",(0..15)); -+ -+$code.=<<___; -+.type __poly1305_block,\@abi-omnipotent -+.align 32 -+__poly1305_block: -+___ -+ &poly1305_iteration(); -+$code.=<<___; -+ ret -+.size __poly1305_block,.-__poly1305_block -+ -+.type __poly1305_init_avx,\@abi-omnipotent -+.align 32 -+__poly1305_init_avx: -+ mov $r0,$h0 -+ mov $r1,$h1 -+ xor $h2,$h2 -+ -+ lea 48+64($ctx),$ctx # size optimization -+ -+ mov $r1,%rax -+ call __poly1305_block # r^2 -+ -+ mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26 -+ mov \$0x3ffffff,%edx -+ mov $h0,$d1 -+ and $h0#d,%eax -+ mov $r0,$d2 -+ and $r0#d,%edx -+ mov %eax,`16*0+0-64`($ctx) -+ shr \$26,$d1 -+ mov %edx,`16*0+4-64`($ctx) -+ shr \$26,$d2 -+ -+ mov \$0x3ffffff,%eax -+ mov \$0x3ffffff,%edx -+ and $d1#d,%eax -+ and $d2#d,%edx -+ mov %eax,`16*1+0-64`($ctx) -+ lea (%rax,%rax,4),%eax # *5 -+ mov %edx,`16*1+4-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ mov %eax,`16*2+0-64`($ctx) -+ shr \$26,$d1 -+ mov %edx,`16*2+4-64`($ctx) -+ shr \$26,$d2 -+ -+ mov $h1,%rax -+ mov $r1,%rdx -+ shl \$12,%rax -+ shl \$12,%rdx -+ or $d1,%rax -+ or $d2,%rdx -+ and \$0x3ffffff,%eax -+ and \$0x3ffffff,%edx -+ mov %eax,`16*3+0-64`($ctx) -+ lea (%rax,%rax,4),%eax # *5 -+ mov %edx,`16*3+4-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ mov %eax,`16*4+0-64`($ctx) -+ mov $h1,$d1 -+ mov %edx,`16*4+4-64`($ctx) -+ mov $r1,$d2 -+ -+ mov \$0x3ffffff,%eax -+ mov \$0x3ffffff,%edx -+ shr \$14,$d1 -+ shr \$14,$d2 -+ and $d1#d,%eax -+ and $d2#d,%edx -+ mov %eax,`16*5+0-64`($ctx) -+ lea (%rax,%rax,4),%eax # *5 -+ mov %edx,`16*5+4-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ mov %eax,`16*6+0-64`($ctx) -+ shr \$26,$d1 -+ mov %edx,`16*6+4-64`($ctx) -+ shr \$26,$d2 -+ -+ mov $h2,%rax -+ shl \$24,%rax -+ or %rax,$d1 -+ mov $d1#d,`16*7+0-64`($ctx) -+ lea ($d1,$d1,4),$d1 # *5 -+ mov $d2#d,`16*7+4-64`($ctx) -+ lea ($d2,$d2,4),$d2 # *5 -+ mov $d1#d,`16*8+0-64`($ctx) -+ mov $d2#d,`16*8+4-64`($ctx) -+ -+ mov $r1,%rax -+ call __poly1305_block # r^3 -+ -+ mov \$0x3ffffff,%eax # save r^3 base 2^26 -+ mov $h0,$d1 -+ and $h0#d,%eax -+ shr \$26,$d1 -+ mov %eax,`16*0+12-64`($ctx) -+ -+ mov \$0x3ffffff,%edx -+ and $d1#d,%edx -+ mov %edx,`16*1+12-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ shr \$26,$d1 -+ mov %edx,`16*2+12-64`($ctx) -+ -+ mov $h1,%rax -+ shl \$12,%rax -+ or $d1,%rax -+ and \$0x3ffffff,%eax -+ mov %eax,`16*3+12-64`($ctx) -+ lea (%rax,%rax,4),%eax # *5 -+ mov $h1,$d1 -+ mov %eax,`16*4+12-64`($ctx) -+ -+ mov \$0x3ffffff,%edx -+ shr \$14,$d1 -+ and $d1#d,%edx -+ mov %edx,`16*5+12-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ shr \$26,$d1 -+ mov %edx,`16*6+12-64`($ctx) -+ -+ mov $h2,%rax -+ shl \$24,%rax -+ or %rax,$d1 -+ mov $d1#d,`16*7+12-64`($ctx) -+ lea ($d1,$d1,4),$d1 # *5 -+ mov $d1#d,`16*8+12-64`($ctx) -+ -+ mov $r1,%rax -+ call __poly1305_block # r^4 -+ -+ mov \$0x3ffffff,%eax # save r^4 base 2^26 -+ mov $h0,$d1 -+ and $h0#d,%eax -+ shr \$26,$d1 -+ mov %eax,`16*0+8-64`($ctx) -+ -+ mov \$0x3ffffff,%edx -+ and $d1#d,%edx -+ mov %edx,`16*1+8-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ shr \$26,$d1 -+ mov %edx,`16*2+8-64`($ctx) -+ -+ mov $h1,%rax -+ shl \$12,%rax -+ or $d1,%rax -+ and \$0x3ffffff,%eax -+ mov %eax,`16*3+8-64`($ctx) -+ lea (%rax,%rax,4),%eax # *5 -+ mov $h1,$d1 -+ mov %eax,`16*4+8-64`($ctx) -+ -+ mov \$0x3ffffff,%edx -+ shr \$14,$d1 -+ and $d1#d,%edx -+ mov %edx,`16*5+8-64`($ctx) -+ lea (%rdx,%rdx,4),%edx # *5 -+ shr \$26,$d1 -+ mov %edx,`16*6+8-64`($ctx) -+ -+ mov $h2,%rax -+ shl \$24,%rax -+ or %rax,$d1 -+ mov $d1#d,`16*7+8-64`($ctx) -+ lea ($d1,$d1,4),$d1 # *5 -+ mov $d1#d,`16*8+8-64`($ctx) -+ -+ lea -48-64($ctx),$ctx # size [de-]optimization -+ ret -+.size __poly1305_init_avx,.-__poly1305_init_avx -+ -+.type poly1305_blocks_avx,\@function,4 -+.align 32 -+poly1305_blocks_avx: -+.cfi_startproc -+ mov 20($ctx),%r8d # is_base2_26 -+ cmp \$128,$len -+ jae .Lblocks_avx -+ test %r8d,%r8d -+ jz .Lblocks -+ -+.Lblocks_avx: -+ and \$-16,$len -+ jz .Lno_data_avx -+ -+ vzeroupper -+ -+ test %r8d,%r8d -+ jz .Lbase2_64_avx -+ -+ test \$31,$len -+ jz .Leven_avx -+ -+ push %rbx -+.cfi_push %rbx -+ push %rbp -+.cfi_push %rbp -+ push %r12 -+.cfi_push %r12 -+ push %r13 -+.cfi_push %r13 -+ push %r14 -+.cfi_push %r14 -+ push %r15 -+.cfi_push %r15 -+.Lblocks_avx_body: -+ -+ mov $len,%r15 # reassign $len -+ -+ mov 0($ctx),$d1 # load hash value -+ mov 8($ctx),$d2 -+ mov 16($ctx),$h2#d -+ -+ mov 24($ctx),$r0 # load r -+ mov 32($ctx),$s1 -+ -+ ################################# base 2^26 -> base 2^64 -+ mov $d1#d,$h0#d -+ and \$`-1*(1<<31)`,$d1 -+ mov $d2,$r1 # borrow $r1 -+ mov $d2#d,$h1#d -+ and \$`-1*(1<<31)`,$d2 -+ -+ shr \$6,$d1 -+ shl \$52,$r1 -+ add $d1,$h0 -+ shr \$12,$h1 -+ shr \$18,$d2 -+ add $r1,$h0 -+ adc $d2,$h1 -+ -+ mov $h2,$d1 -+ shl \$40,$d1 -+ shr \$24,$h2 -+ add $d1,$h1 -+ adc \$0,$h2 # can be partially reduced... -+ -+ mov \$-4,$d2 # ... so reduce -+ mov $h2,$d1 -+ and $h2,$d2 -+ shr \$2,$d1 -+ and \$3,$h2 -+ add $d2,$d1 # =*5 -+ add $d1,$h0 -+ adc \$0,$h1 -+ adc \$0,$h2 -+ -+ mov $s1,$r1 -+ mov $s1,%rax -+ shr \$2,$s1 -+ add $r1,$s1 # s1 = r1 + (r1 >> 2) -+ -+ add 0($inp),$h0 # accumulate input -+ adc 8($inp),$h1 -+ lea 16($inp),$inp -+ adc $padbit,$h2 -+ -+ call __poly1305_block -+ -+ test $padbit,$padbit # if $padbit is zero, -+ jz .Lstore_base2_64_avx # store hash in base 2^64 format -+ -+ ################################# base 2^64 -> base 2^26 -+ mov $h0,%rax -+ mov $h0,%rdx -+ shr \$52,$h0 -+ mov $h1,$r0 -+ mov $h1,$r1 -+ shr \$26,%rdx -+ and \$0x3ffffff,%rax # h[0] -+ shl \$12,$r0 -+ and \$0x3ffffff,%rdx # h[1] -+ shr \$14,$h1 -+ or $r0,$h0 -+ shl \$24,$h2 -+ and \$0x3ffffff,$h0 # h[2] -+ shr \$40,$r1 -+ and \$0x3ffffff,$h1 # h[3] -+ or $r1,$h2 # h[4] -+ -+ sub \$16,%r15 -+ jz .Lstore_base2_26_avx -+ -+ vmovd %rax#d,$H0 -+ vmovd %rdx#d,$H1 -+ vmovd $h0#d,$H2 -+ vmovd $h1#d,$H3 -+ vmovd $h2#d,$H4 -+ jmp .Lproceed_avx -+ -+.align 32 -+.Lstore_base2_64_avx: -+ mov $h0,0($ctx) -+ mov $h1,8($ctx) -+ mov $h2,16($ctx) # note that is_base2_26 is zeroed -+ jmp .Ldone_avx -+ -+.align 16 -+.Lstore_base2_26_avx: -+ mov %rax#d,0($ctx) # store hash value base 2^26 -+ mov %rdx#d,4($ctx) -+ mov $h0#d,8($ctx) -+ mov $h1#d,12($ctx) -+ mov $h2#d,16($ctx) -+.align 16 -+.Ldone_avx: -+ mov 0(%rsp),%r15 -+.cfi_restore %r15 -+ mov 8(%rsp),%r14 -+.cfi_restore %r14 -+ mov 16(%rsp),%r13 -+.cfi_restore %r13 -+ mov 24(%rsp),%r12 -+.cfi_restore %r12 -+ mov 32(%rsp),%rbp -+.cfi_restore %rbp -+ mov 40(%rsp),%rbx -+.cfi_restore %rbx -+ lea 48(%rsp),%rsp -+.cfi_adjust_cfa_offset -48 -+.Lno_data_avx: -+.Lblocks_avx_epilogue: -+ ret -+.cfi_endproc -+ -+.align 32 -+.Lbase2_64_avx: -+.cfi_startproc -+ push %rbx -+.cfi_push %rbx -+ push %rbp -+.cfi_push %rbp -+ push %r12 -+.cfi_push %r12 -+ push %r13 -+.cfi_push %r13 -+ push %r14 -+.cfi_push %r14 -+ push %r15 -+.cfi_push %r15 -+.Lbase2_64_avx_body: -+ -+ mov $len,%r15 # reassign $len -+ -+ mov 24($ctx),$r0 # load r -+ mov 32($ctx),$s1 -+ -+ mov 0($ctx),$h0 # load hash value -+ mov 8($ctx),$h1 -+ mov 16($ctx),$h2#d -+ -+ mov $s1,$r1 -+ mov $s1,%rax -+ shr \$2,$s1 -+ add $r1,$s1 # s1 = r1 + (r1 >> 2) -+ -+ test \$31,$len -+ jz .Linit_avx -+ -+ add 0($inp),$h0 # accumulate input -+ adc 8($inp),$h1 -+ lea 16($inp),$inp -+ adc $padbit,$h2 -+ sub \$16,%r15 -+ -+ call __poly1305_block -+ -+.Linit_avx: -+ ################################# base 2^64 -> base 2^26 -+ mov $h0,%rax -+ mov $h0,%rdx -+ shr \$52,$h0 -+ mov $h1,$d1 -+ mov $h1,$d2 -+ shr \$26,%rdx -+ and \$0x3ffffff,%rax # h[0] -+ shl \$12,$d1 -+ and \$0x3ffffff,%rdx # h[1] -+ shr \$14,$h1 -+ or $d1,$h0 -+ shl \$24,$h2 -+ and \$0x3ffffff,$h0 # h[2] -+ shr \$40,$d2 -+ and \$0x3ffffff,$h1 # h[3] -+ or $d2,$h2 # h[4] -+ -+ vmovd %rax#d,$H0 -+ vmovd %rdx#d,$H1 -+ vmovd $h0#d,$H2 -+ vmovd $h1#d,$H3 -+ vmovd $h2#d,$H4 -+ movl \$1,20($ctx) # set is_base2_26 -+ -+ call __poly1305_init_avx -+ -+.Lproceed_avx: -+ mov %r15,$len -+ -+ mov 0(%rsp),%r15 -+.cfi_restore %r15 -+ mov 8(%rsp),%r14 -+.cfi_restore %r14 -+ mov 16(%rsp),%r13 -+.cfi_restore %r13 -+ mov 24(%rsp),%r12 -+.cfi_restore %r12 -+ mov 32(%rsp),%rbp -+.cfi_restore %rbp -+ mov 40(%rsp),%rbx -+.cfi_restore %rbx -+ lea 48(%rsp),%rax -+ lea 48(%rsp),%rsp -+.cfi_adjust_cfa_offset -48 -+.Lbase2_64_avx_epilogue: -+ jmp .Ldo_avx -+.cfi_endproc -+ -+.align 32 -+.Leven_avx: -+.cfi_startproc -+ vmovd 4*0($ctx),$H0 # load hash value -+ vmovd 4*1($ctx),$H1 -+ vmovd 4*2($ctx),$H2 -+ vmovd 4*3($ctx),$H3 -+ vmovd 4*4($ctx),$H4 -+ -+.Ldo_avx: -+___ -+$code.=<<___ if (!$win64); -+ lea -0x58(%rsp),%r11 -+.cfi_def_cfa %r11,0x60 -+ sub \$0x178,%rsp -+___ -+$code.=<<___ if ($win64); -+ lea -0xf8(%rsp),%r11 -+ sub \$0x218,%rsp -+ vmovdqa %xmm6,0x50(%r11) -+ vmovdqa %xmm7,0x60(%r11) -+ vmovdqa %xmm8,0x70(%r11) -+ vmovdqa %xmm9,0x80(%r11) -+ vmovdqa %xmm10,0x90(%r11) -+ vmovdqa %xmm11,0xa0(%r11) -+ vmovdqa %xmm12,0xb0(%r11) -+ vmovdqa %xmm13,0xc0(%r11) -+ vmovdqa %xmm14,0xd0(%r11) -+ vmovdqa %xmm15,0xe0(%r11) -+.Ldo_avx_body: -+___ -+$code.=<<___; -+ sub \$64,$len -+ lea -32($inp),%rax -+ cmovc %rax,$inp -+ -+ vmovdqu `16*3`($ctx),$D4 # preload r0^2 -+ lea `16*3+64`($ctx),$ctx # size optimization -+ lea .Lconst(%rip),%rcx -+ -+ ################################################################ -+ # load input -+ vmovdqu 16*2($inp),$T0 -+ vmovdqu 16*3($inp),$T1 -+ vmovdqa 64(%rcx),$MASK # .Lmask26 -+ -+ vpsrldq \$6,$T0,$T2 # splat input -+ vpsrldq \$6,$T1,$T3 -+ vpunpckhqdq $T1,$T0,$T4 # 4 -+ vpunpcklqdq $T1,$T0,$T0 # 0:1 -+ vpunpcklqdq $T3,$T2,$T3 # 2:3 -+ -+ vpsrlq \$40,$T4,$T4 # 4 -+ vpsrlq \$26,$T0,$T1 -+ vpand $MASK,$T0,$T0 # 0 -+ vpsrlq \$4,$T3,$T2 -+ vpand $MASK,$T1,$T1 # 1 -+ vpsrlq \$30,$T3,$T3 -+ vpand $MASK,$T2,$T2 # 2 -+ vpand $MASK,$T3,$T3 # 3 -+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always -+ -+ jbe .Lskip_loop_avx -+ -+ # expand and copy pre-calculated table to stack -+ vmovdqu `16*1-64`($ctx),$D1 -+ vmovdqu `16*2-64`($ctx),$D2 -+ vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434 -+ vpshufd \$0x44,$D4,$D0 # xx12 -> 1212 -+ vmovdqa $D3,-0x90(%r11) -+ vmovdqa $D0,0x00(%rsp) -+ vpshufd \$0xEE,$D1,$D4 -+ vmovdqu `16*3-64`($ctx),$D0 -+ vpshufd \$0x44,$D1,$D1 -+ vmovdqa $D4,-0x80(%r11) -+ vmovdqa $D1,0x10(%rsp) -+ vpshufd \$0xEE,$D2,$D3 -+ vmovdqu `16*4-64`($ctx),$D1 -+ vpshufd \$0x44,$D2,$D2 -+ vmovdqa $D3,-0x70(%r11) -+ vmovdqa $D2,0x20(%rsp) -+ vpshufd \$0xEE,$D0,$D4 -+ vmovdqu `16*5-64`($ctx),$D2 -+ vpshufd \$0x44,$D0,$D0 -+ vmovdqa $D4,-0x60(%r11) -+ vmovdqa $D0,0x30(%rsp) -+ vpshufd \$0xEE,$D1,$D3 -+ vmovdqu `16*6-64`($ctx),$D0 -+ vpshufd \$0x44,$D1,$D1 -+ vmovdqa $D3,-0x50(%r11) -+ vmovdqa $D1,0x40(%rsp) -+ vpshufd \$0xEE,$D2,$D4 -+ vmovdqu `16*7-64`($ctx),$D1 -+ vpshufd \$0x44,$D2,$D2 -+ vmovdqa $D4,-0x40(%r11) -+ vmovdqa $D2,0x50(%rsp) -+ vpshufd \$0xEE,$D0,$D3 -+ vmovdqu `16*8-64`($ctx),$D2 -+ vpshufd \$0x44,$D0,$D0 -+ vmovdqa $D3,-0x30(%r11) -+ vmovdqa $D0,0x60(%rsp) -+ vpshufd \$0xEE,$D1,$D4 -+ vpshufd \$0x44,$D1,$D1 -+ vmovdqa $D4,-0x20(%r11) -+ vmovdqa $D1,0x70(%rsp) -+ vpshufd \$0xEE,$D2,$D3 -+ vmovdqa 0x00(%rsp),$D4 # preload r0^2 -+ vpshufd \$0x44,$D2,$D2 -+ vmovdqa $D3,-0x10(%r11) -+ vmovdqa $D2,0x80(%rsp) -+ -+ jmp .Loop_avx -+ -+.align 32 -+.Loop_avx: -+ ################################################################ -+ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 -+ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r -+ # \___________________/ -+ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 -+ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r -+ # \___________________/ \____________________/ -+ # -+ # Note that we start with inp[2:3]*r^2. This is because it -+ # doesn't depend on reduction in previous iteration. -+ ################################################################ -+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ # -+ # though note that $Tx and $Hx are "reversed" in this section, -+ # and $D4 is preloaded with r0^2... -+ -+ vpmuludq $T0,$D4,$D0 # d0 = h0*r0 -+ vpmuludq $T1,$D4,$D1 # d1 = h1*r0 -+ vmovdqa $H2,0x20(%r11) # offload hash -+ vpmuludq $T2,$D4,$D2 # d3 = h2*r0 -+ vmovdqa 0x10(%rsp),$H2 # r1^2 -+ vpmuludq $T3,$D4,$D3 # d3 = h3*r0 -+ vpmuludq $T4,$D4,$D4 # d4 = h4*r0 -+ -+ vmovdqa $H0,0x00(%r11) # -+ vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1 -+ vmovdqa $H1,0x10(%r11) # -+ vpmuludq $T3,$H2,$H1 # h3*r1 -+ vpaddq $H0,$D0,$D0 # d0 += h4*s1 -+ vpaddq $H1,$D4,$D4 # d4 += h3*r1 -+ vmovdqa $H3,0x30(%r11) # -+ vpmuludq $T2,$H2,$H0 # h2*r1 -+ vpmuludq $T1,$H2,$H1 # h1*r1 -+ vpaddq $H0,$D3,$D3 # d3 += h2*r1 -+ vmovdqa 0x30(%rsp),$H3 # r2^2 -+ vpaddq $H1,$D2,$D2 # d2 += h1*r1 -+ vmovdqa $H4,0x40(%r11) # -+ vpmuludq $T0,$H2,$H2 # h0*r1 -+ vpmuludq $T2,$H3,$H0 # h2*r2 -+ vpaddq $H2,$D1,$D1 # d1 += h0*r1 -+ -+ vmovdqa 0x40(%rsp),$H4 # s2^2 -+ vpaddq $H0,$D4,$D4 # d4 += h2*r2 -+ vpmuludq $T1,$H3,$H1 # h1*r2 -+ vpmuludq $T0,$H3,$H3 # h0*r2 -+ vpaddq $H1,$D3,$D3 # d3 += h1*r2 -+ vmovdqa 0x50(%rsp),$H2 # r3^2 -+ vpaddq $H3,$D2,$D2 # d2 += h0*r2 -+ vpmuludq $T4,$H4,$H0 # h4*s2 -+ vpmuludq $T3,$H4,$H4 # h3*s2 -+ vpaddq $H0,$D1,$D1 # d1 += h4*s2 -+ vmovdqa 0x60(%rsp),$H3 # s3^2 -+ vpaddq $H4,$D0,$D0 # d0 += h3*s2 -+ -+ vmovdqa 0x80(%rsp),$H4 # s4^2 -+ vpmuludq $T1,$H2,$H1 # h1*r3 -+ vpmuludq $T0,$H2,$H2 # h0*r3 -+ vpaddq $H1,$D4,$D4 # d4 += h1*r3 -+ vpaddq $H2,$D3,$D3 # d3 += h0*r3 -+ vpmuludq $T4,$H3,$H0 # h4*s3 -+ vpmuludq $T3,$H3,$H1 # h3*s3 -+ vpaddq $H0,$D2,$D2 # d2 += h4*s3 -+ vmovdqu 16*0($inp),$H0 # load input -+ vpaddq $H1,$D1,$D1 # d1 += h3*s3 -+ vpmuludq $T2,$H3,$H3 # h2*s3 -+ vpmuludq $T2,$H4,$T2 # h2*s4 -+ vpaddq $H3,$D0,$D0 # d0 += h2*s3 -+ -+ vmovdqu 16*1($inp),$H1 # -+ vpaddq $T2,$D1,$D1 # d1 += h2*s4 -+ vpmuludq $T3,$H4,$T3 # h3*s4 -+ vpmuludq $T4,$H4,$T4 # h4*s4 -+ vpsrldq \$6,$H0,$H2 # splat input -+ vpaddq $T3,$D2,$D2 # d2 += h3*s4 -+ vpaddq $T4,$D3,$D3 # d3 += h4*s4 -+ vpsrldq \$6,$H1,$H3 # -+ vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4 -+ vpmuludq $T1,$H4,$T0 # h1*s4 -+ vpunpckhqdq $H1,$H0,$H4 # 4 -+ vpaddq $T4,$D4,$D4 # d4 += h0*r4 -+ vmovdqa -0x90(%r11),$T4 # r0^4 -+ vpaddq $T0,$D0,$D0 # d0 += h1*s4 -+ -+ vpunpcklqdq $H1,$H0,$H0 # 0:1 -+ vpunpcklqdq $H3,$H2,$H3 # 2:3 -+ -+ #vpsrlq \$40,$H4,$H4 # 4 -+ vpsrldq \$`40/8`,$H4,$H4 # 4 -+ vpsrlq \$26,$H0,$H1 -+ vpand $MASK,$H0,$H0 # 0 -+ vpsrlq \$4,$H3,$H2 -+ vpand $MASK,$H1,$H1 # 1 -+ vpand 0(%rcx),$H4,$H4 # .Lmask24 -+ vpsrlq \$30,$H3,$H3 -+ vpand $MASK,$H2,$H2 # 2 -+ vpand $MASK,$H3,$H3 # 3 -+ vpor 32(%rcx),$H4,$H4 # padbit, yes, always -+ -+ vpaddq 0x00(%r11),$H0,$H0 # add hash value -+ vpaddq 0x10(%r11),$H1,$H1 -+ vpaddq 0x20(%r11),$H2,$H2 -+ vpaddq 0x30(%r11),$H3,$H3 -+ vpaddq 0x40(%r11),$H4,$H4 -+ -+ lea 16*2($inp),%rax -+ lea 16*4($inp),$inp -+ sub \$64,$len -+ cmovc %rax,$inp -+ -+ ################################################################ -+ # Now we accumulate (inp[0:1]+hash)*r^4 -+ ################################################################ -+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ -+ vpmuludq $H0,$T4,$T0 # h0*r0 -+ vpmuludq $H1,$T4,$T1 # h1*r0 -+ vpaddq $T0,$D0,$D0 -+ vpaddq $T1,$D1,$D1 -+ vmovdqa -0x80(%r11),$T2 # r1^4 -+ vpmuludq $H2,$T4,$T0 # h2*r0 -+ vpmuludq $H3,$T4,$T1 # h3*r0 -+ vpaddq $T0,$D2,$D2 -+ vpaddq $T1,$D3,$D3 -+ vpmuludq $H4,$T4,$T4 # h4*r0 -+ vpmuludq -0x70(%r11),$H4,$T0 # h4*s1 -+ vpaddq $T4,$D4,$D4 -+ -+ vpaddq $T0,$D0,$D0 # d0 += h4*s1 -+ vpmuludq $H2,$T2,$T1 # h2*r1 -+ vpmuludq $H3,$T2,$T0 # h3*r1 -+ vpaddq $T1,$D3,$D3 # d3 += h2*r1 -+ vmovdqa -0x60(%r11),$T3 # r2^4 -+ vpaddq $T0,$D4,$D4 # d4 += h3*r1 -+ vpmuludq $H1,$T2,$T1 # h1*r1 -+ vpmuludq $H0,$T2,$T2 # h0*r1 -+ vpaddq $T1,$D2,$D2 # d2 += h1*r1 -+ vpaddq $T2,$D1,$D1 # d1 += h0*r1 -+ -+ vmovdqa -0x50(%r11),$T4 # s2^4 -+ vpmuludq $H2,$T3,$T0 # h2*r2 -+ vpmuludq $H1,$T3,$T1 # h1*r2 -+ vpaddq $T0,$D4,$D4 # d4 += h2*r2 -+ vpaddq $T1,$D3,$D3 # d3 += h1*r2 -+ vmovdqa -0x40(%r11),$T2 # r3^4 -+ vpmuludq $H0,$T3,$T3 # h0*r2 -+ vpmuludq $H4,$T4,$T0 # h4*s2 -+ vpaddq $T3,$D2,$D2 # d2 += h0*r2 -+ vpaddq $T0,$D1,$D1 # d1 += h4*s2 -+ vmovdqa -0x30(%r11),$T3 # s3^4 -+ vpmuludq $H3,$T4,$T4 # h3*s2 -+ vpmuludq $H1,$T2,$T1 # h1*r3 -+ vpaddq $T4,$D0,$D0 # d0 += h3*s2 -+ -+ vmovdqa -0x10(%r11),$T4 # s4^4 -+ vpaddq $T1,$D4,$D4 # d4 += h1*r3 -+ vpmuludq $H0,$T2,$T2 # h0*r3 -+ vpmuludq $H4,$T3,$T0 # h4*s3 -+ vpaddq $T2,$D3,$D3 # d3 += h0*r3 -+ vpaddq $T0,$D2,$D2 # d2 += h4*s3 -+ vmovdqu 16*2($inp),$T0 # load input -+ vpmuludq $H3,$T3,$T2 # h3*s3 -+ vpmuludq $H2,$T3,$T3 # h2*s3 -+ vpaddq $T2,$D1,$D1 # d1 += h3*s3 -+ vmovdqu 16*3($inp),$T1 # -+ vpaddq $T3,$D0,$D0 # d0 += h2*s3 -+ -+ vpmuludq $H2,$T4,$H2 # h2*s4 -+ vpmuludq $H3,$T4,$H3 # h3*s4 -+ vpsrldq \$6,$T0,$T2 # splat input -+ vpaddq $H2,$D1,$D1 # d1 += h2*s4 -+ vpmuludq $H4,$T4,$H4 # h4*s4 -+ vpsrldq \$6,$T1,$T3 # -+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4 -+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4 -+ vpmuludq -0x20(%r11),$H0,$H4 # h0*r4 -+ vpmuludq $H1,$T4,$H0 -+ vpunpckhqdq $T1,$T0,$T4 # 4 -+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 -+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 -+ -+ vpunpcklqdq $T1,$T0,$T0 # 0:1 -+ vpunpcklqdq $T3,$T2,$T3 # 2:3 -+ -+ #vpsrlq \$40,$T4,$T4 # 4 -+ vpsrldq \$`40/8`,$T4,$T4 # 4 -+ vpsrlq \$26,$T0,$T1 -+ vmovdqa 0x00(%rsp),$D4 # preload r0^2 -+ vpand $MASK,$T0,$T0 # 0 -+ vpsrlq \$4,$T3,$T2 -+ vpand $MASK,$T1,$T1 # 1 -+ vpand 0(%rcx),$T4,$T4 # .Lmask24 -+ vpsrlq \$30,$T3,$T3 -+ vpand $MASK,$T2,$T2 # 2 -+ vpand $MASK,$T3,$T3 # 3 -+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always -+ -+ ################################################################ -+ # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein -+ # and P. Schwabe -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$D1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H4,$D0 -+ vpand $MASK,$H4,$H4 -+ -+ vpsrlq \$26,$H1,$D1 -+ vpand $MASK,$H1,$H1 -+ vpaddq $D1,$H2,$H2 # h1 -> h2 -+ -+ vpaddq $D0,$H0,$H0 -+ vpsllq \$2,$D0,$D0 -+ vpaddq $D0,$H0,$H0 # h4 -> h0 -+ -+ vpsrlq \$26,$H2,$D2 -+ vpand $MASK,$H2,$H2 -+ vpaddq $D2,$H3,$H3 # h2 -> h3 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ ja .Loop_avx -+ -+.Lskip_loop_avx: -+ ################################################################ -+ # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 -+ -+ vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2 -+ add \$32,$len -+ jnz .Long_tail_avx -+ -+ vpaddq $H2,$T2,$T2 -+ vpaddq $H0,$T0,$T0 -+ vpaddq $H1,$T1,$T1 -+ vpaddq $H3,$T3,$T3 -+ vpaddq $H4,$T4,$T4 -+ -+.Long_tail_avx: -+ vmovdqa $H2,0x20(%r11) -+ vmovdqa $H0,0x00(%r11) -+ vmovdqa $H1,0x10(%r11) -+ vmovdqa $H3,0x30(%r11) -+ vmovdqa $H4,0x40(%r11) -+ -+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ -+ vpmuludq $T2,$D4,$D2 # d2 = h2*r0 -+ vpmuludq $T0,$D4,$D0 # d0 = h0*r0 -+ vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n -+ vpmuludq $T1,$D4,$D1 # d1 = h1*r0 -+ vpmuludq $T3,$D4,$D3 # d3 = h3*r0 -+ vpmuludq $T4,$D4,$D4 # d4 = h4*r0 -+ -+ vpmuludq $T3,$H2,$H0 # h3*r1 -+ vpaddq $H0,$D4,$D4 # d4 += h3*r1 -+ vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n -+ vpmuludq $T2,$H2,$H1 # h2*r1 -+ vpaddq $H1,$D3,$D3 # d3 += h2*r1 -+ vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n -+ vpmuludq $T1,$H2,$H0 # h1*r1 -+ vpaddq $H0,$D2,$D2 # d2 += h1*r1 -+ vpmuludq $T0,$H2,$H2 # h0*r1 -+ vpaddq $H2,$D1,$D1 # d1 += h0*r1 -+ vpmuludq $T4,$H3,$H3 # h4*s1 -+ vpaddq $H3,$D0,$D0 # d0 += h4*s1 -+ -+ vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n -+ vpmuludq $T2,$H4,$H1 # h2*r2 -+ vpaddq $H1,$D4,$D4 # d4 += h2*r2 -+ vpmuludq $T1,$H4,$H0 # h1*r2 -+ vpaddq $H0,$D3,$D3 # d3 += h1*r2 -+ vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n -+ vpmuludq $T0,$H4,$H4 # h0*r2 -+ vpaddq $H4,$D2,$D2 # d2 += h0*r2 -+ vpmuludq $T4,$H2,$H1 # h4*s2 -+ vpaddq $H1,$D1,$D1 # d1 += h4*s2 -+ vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n -+ vpmuludq $T3,$H2,$H2 # h3*s2 -+ vpaddq $H2,$D0,$D0 # d0 += h3*s2 -+ -+ vpmuludq $T1,$H3,$H0 # h1*r3 -+ vpaddq $H0,$D4,$D4 # d4 += h1*r3 -+ vpmuludq $T0,$H3,$H3 # h0*r3 -+ vpaddq $H3,$D3,$D3 # d3 += h0*r3 -+ vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n -+ vpmuludq $T4,$H4,$H1 # h4*s3 -+ vpaddq $H1,$D2,$D2 # d2 += h4*s3 -+ vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n -+ vpmuludq $T3,$H4,$H0 # h3*s3 -+ vpaddq $H0,$D1,$D1 # d1 += h3*s3 -+ vpmuludq $T2,$H4,$H4 # h2*s3 -+ vpaddq $H4,$D0,$D0 # d0 += h2*s3 -+ -+ vpmuludq $T0,$H2,$H2 # h0*r4 -+ vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4 -+ vpmuludq $T4,$H3,$H1 # h4*s4 -+ vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4 -+ vpmuludq $T3,$H3,$H0 # h3*s4 -+ vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4 -+ vpmuludq $T2,$H3,$H1 # h2*s4 -+ vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4 -+ vpmuludq $T1,$H3,$H3 # h1*s4 -+ vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4 -+ -+ jz .Lshort_tail_avx -+ -+ vmovdqu 16*0($inp),$H0 # load input -+ vmovdqu 16*1($inp),$H1 -+ -+ vpsrldq \$6,$H0,$H2 # splat input -+ vpsrldq \$6,$H1,$H3 -+ vpunpckhqdq $H1,$H0,$H4 # 4 -+ vpunpcklqdq $H1,$H0,$H0 # 0:1 -+ vpunpcklqdq $H3,$H2,$H3 # 2:3 -+ -+ vpsrlq \$40,$H4,$H4 # 4 -+ vpsrlq \$26,$H0,$H1 -+ vpand $MASK,$H0,$H0 # 0 -+ vpsrlq \$4,$H3,$H2 -+ vpand $MASK,$H1,$H1 # 1 -+ vpsrlq \$30,$H3,$H3 -+ vpand $MASK,$H2,$H2 # 2 -+ vpand $MASK,$H3,$H3 # 3 -+ vpor 32(%rcx),$H4,$H4 # padbit, yes, always -+ -+ vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4 -+ vpaddq 0x00(%r11),$H0,$H0 -+ vpaddq 0x10(%r11),$H1,$H1 -+ vpaddq 0x20(%r11),$H2,$H2 -+ vpaddq 0x30(%r11),$H3,$H3 -+ vpaddq 0x40(%r11),$H4,$H4 -+ -+ ################################################################ -+ # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate -+ -+ vpmuludq $H0,$T4,$T0 # h0*r0 -+ vpaddq $T0,$D0,$D0 # d0 += h0*r0 -+ vpmuludq $H1,$T4,$T1 # h1*r0 -+ vpaddq $T1,$D1,$D1 # d1 += h1*r0 -+ vpmuludq $H2,$T4,$T0 # h2*r0 -+ vpaddq $T0,$D2,$D2 # d2 += h2*r0 -+ vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n -+ vpmuludq $H3,$T4,$T1 # h3*r0 -+ vpaddq $T1,$D3,$D3 # d3 += h3*r0 -+ vpmuludq $H4,$T4,$T4 # h4*r0 -+ vpaddq $T4,$D4,$D4 # d4 += h4*r0 -+ -+ vpmuludq $H3,$T2,$T0 # h3*r1 -+ vpaddq $T0,$D4,$D4 # d4 += h3*r1 -+ vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1 -+ vpmuludq $H2,$T2,$T1 # h2*r1 -+ vpaddq $T1,$D3,$D3 # d3 += h2*r1 -+ vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2 -+ vpmuludq $H1,$T2,$T0 # h1*r1 -+ vpaddq $T0,$D2,$D2 # d2 += h1*r1 -+ vpmuludq $H0,$T2,$T2 # h0*r1 -+ vpaddq $T2,$D1,$D1 # d1 += h0*r1 -+ vpmuludq $H4,$T3,$T3 # h4*s1 -+ vpaddq $T3,$D0,$D0 # d0 += h4*s1 -+ -+ vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2 -+ vpmuludq $H2,$T4,$T1 # h2*r2 -+ vpaddq $T1,$D4,$D4 # d4 += h2*r2 -+ vpmuludq $H1,$T4,$T0 # h1*r2 -+ vpaddq $T0,$D3,$D3 # d3 += h1*r2 -+ vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3 -+ vpmuludq $H0,$T4,$T4 # h0*r2 -+ vpaddq $T4,$D2,$D2 # d2 += h0*r2 -+ vpmuludq $H4,$T2,$T1 # h4*s2 -+ vpaddq $T1,$D1,$D1 # d1 += h4*s2 -+ vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3 -+ vpmuludq $H3,$T2,$T2 # h3*s2 -+ vpaddq $T2,$D0,$D0 # d0 += h3*s2 -+ -+ vpmuludq $H1,$T3,$T0 # h1*r3 -+ vpaddq $T0,$D4,$D4 # d4 += h1*r3 -+ vpmuludq $H0,$T3,$T3 # h0*r3 -+ vpaddq $T3,$D3,$D3 # d3 += h0*r3 -+ vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4 -+ vpmuludq $H4,$T4,$T1 # h4*s3 -+ vpaddq $T1,$D2,$D2 # d2 += h4*s3 -+ vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4 -+ vpmuludq $H3,$T4,$T0 # h3*s3 -+ vpaddq $T0,$D1,$D1 # d1 += h3*s3 -+ vpmuludq $H2,$T4,$T4 # h2*s3 -+ vpaddq $T4,$D0,$D0 # d0 += h2*s3 -+ -+ vpmuludq $H0,$T2,$T2 # h0*r4 -+ vpaddq $T2,$D4,$D4 # d4 += h0*r4 -+ vpmuludq $H4,$T3,$T1 # h4*s4 -+ vpaddq $T1,$D3,$D3 # d3 += h4*s4 -+ vpmuludq $H3,$T3,$T0 # h3*s4 -+ vpaddq $T0,$D2,$D2 # d2 += h3*s4 -+ vpmuludq $H2,$T3,$T1 # h2*s4 -+ vpaddq $T1,$D1,$D1 # d1 += h2*s4 -+ vpmuludq $H1,$T3,$T3 # h1*s4 -+ vpaddq $T3,$D0,$D0 # d0 += h1*s4 -+ -+.Lshort_tail_avx: -+ ################################################################ -+ # horizontal addition -+ -+ vpsrldq \$8,$D4,$T4 -+ vpsrldq \$8,$D3,$T3 -+ vpsrldq \$8,$D1,$T1 -+ vpsrldq \$8,$D0,$T0 -+ vpsrldq \$8,$D2,$T2 -+ vpaddq $T3,$D3,$D3 -+ vpaddq $T4,$D4,$D4 -+ vpaddq $T0,$D0,$D0 -+ vpaddq $T1,$D1,$D1 -+ vpaddq $T2,$D2,$D2 -+ -+ ################################################################ -+ # lazy reduction -+ -+ vpsrlq \$26,$D3,$H3 -+ vpand $MASK,$D3,$D3 -+ vpaddq $H3,$D4,$D4 # h3 -> h4 -+ -+ vpsrlq \$26,$D0,$H0 -+ vpand $MASK,$D0,$D0 -+ vpaddq $H0,$D1,$D1 # h0 -> h1 -+ -+ vpsrlq \$26,$D4,$H4 -+ vpand $MASK,$D4,$D4 -+ -+ vpsrlq \$26,$D1,$H1 -+ vpand $MASK,$D1,$D1 -+ vpaddq $H1,$D2,$D2 # h1 -> h2 -+ -+ vpaddq $H4,$D0,$D0 -+ vpsllq \$2,$H4,$H4 -+ vpaddq $H4,$D0,$D0 # h4 -> h0 -+ -+ vpsrlq \$26,$D2,$H2 -+ vpand $MASK,$D2,$D2 -+ vpaddq $H2,$D3,$D3 # h2 -> h3 -+ -+ vpsrlq \$26,$D0,$H0 -+ vpand $MASK,$D0,$D0 -+ vpaddq $H0,$D1,$D1 # h0 -> h1 -+ -+ vpsrlq \$26,$D3,$H3 -+ vpand $MASK,$D3,$D3 -+ vpaddq $H3,$D4,$D4 # h3 -> h4 -+ -+ vmovd $D0,`4*0-48-64`($ctx) # save partially reduced -+ vmovd $D1,`4*1-48-64`($ctx) -+ vmovd $D2,`4*2-48-64`($ctx) -+ vmovd $D3,`4*3-48-64`($ctx) -+ vmovd $D4,`4*4-48-64`($ctx) -+___ -+$code.=<<___ if ($win64); -+ vmovdqa 0x50(%r11),%xmm6 -+ vmovdqa 0x60(%r11),%xmm7 -+ vmovdqa 0x70(%r11),%xmm8 -+ vmovdqa 0x80(%r11),%xmm9 -+ vmovdqa 0x90(%r11),%xmm10 -+ vmovdqa 0xa0(%r11),%xmm11 -+ vmovdqa 0xb0(%r11),%xmm12 -+ vmovdqa 0xc0(%r11),%xmm13 -+ vmovdqa 0xd0(%r11),%xmm14 -+ vmovdqa 0xe0(%r11),%xmm15 -+ lea 0xf8(%r11),%rsp -+.Ldo_avx_epilogue: -+___ -+$code.=<<___ if (!$win64); -+ lea 0x58(%r11),%rsp -+.cfi_def_cfa %rsp,8 -+___ -+$code.=<<___; -+ vzeroupper -+ ret -+.cfi_endproc -+.size poly1305_blocks_avx,.-poly1305_blocks_avx -+ -+.type poly1305_emit_avx,\@function,3 -+.align 32 -+poly1305_emit_avx: -+ cmpl \$0,20($ctx) # is_base2_26? -+ je .Lemit -+ -+ mov 0($ctx),%eax # load hash value base 2^26 -+ mov 4($ctx),%ecx -+ mov 8($ctx),%r8d -+ mov 12($ctx),%r11d -+ mov 16($ctx),%r10d -+ -+ shl \$26,%rcx # base 2^26 -> base 2^64 -+ mov %r8,%r9 -+ shl \$52,%r8 -+ add %rcx,%rax -+ shr \$12,%r9 -+ add %rax,%r8 # h0 -+ adc \$0,%r9 -+ -+ shl \$14,%r11 -+ mov %r10,%rax -+ shr \$24,%r10 -+ add %r11,%r9 -+ shl \$40,%rax -+ add %rax,%r9 # h1 -+ adc \$0,%r10 # h2 -+ -+ mov %r10,%rax # could be partially reduced, so reduce -+ mov %r10,%rcx -+ and \$3,%r10 -+ shr \$2,%rax -+ and \$-4,%rcx -+ add %rcx,%rax -+ add %rax,%r8 -+ adc \$0,%r9 -+ adc \$0,%r10 -+ -+ mov %r8,%rax -+ add \$5,%r8 # compare to modulus -+ mov %r9,%rcx -+ adc \$0,%r9 -+ adc \$0,%r10 -+ shr \$2,%r10 # did 130-bit value overflow? -+ cmovnz %r8,%rax -+ cmovnz %r9,%rcx -+ -+ add 0($nonce),%rax # accumulate nonce -+ adc 8($nonce),%rcx -+ mov %rax,0($mac) # write result -+ mov %rcx,8($mac) -+ -+ ret -+.size poly1305_emit_avx,.-poly1305_emit_avx -+___ -+ -+if ($avx>1) { -+my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = -+ map("%ymm$_",(0..15)); -+my $S4=$MASK; -+ -+$code.=<<___; -+.type poly1305_blocks_avx2,\@function,4 -+.align 32 -+poly1305_blocks_avx2: -+.cfi_startproc -+ mov 20($ctx),%r8d # is_base2_26 -+ cmp \$128,$len -+ jae .Lblocks_avx2 -+ test %r8d,%r8d -+ jz .Lblocks -+ -+.Lblocks_avx2: -+ and \$-16,$len -+ jz .Lno_data_avx2 -+ -+ vzeroupper -+ -+ test %r8d,%r8d -+ jz .Lbase2_64_avx2 -+ -+ test \$63,$len -+ jz .Leven_avx2 -+ -+ push %rbx -+.cfi_push %rbx -+ push %rbp -+.cfi_push %rbp -+ push %r12 -+.cfi_push %r12 -+ push %r13 -+.cfi_push %r13 -+ push %r14 -+.cfi_push %r14 -+ push %r15 -+.cfi_push %r15 -+.Lblocks_avx2_body: -+ -+ mov $len,%r15 # reassign $len -+ -+ mov 0($ctx),$d1 # load hash value -+ mov 8($ctx),$d2 -+ mov 16($ctx),$h2#d -+ -+ mov 24($ctx),$r0 # load r -+ mov 32($ctx),$s1 -+ -+ ################################# base 2^26 -> base 2^64 -+ mov $d1#d,$h0#d -+ and \$`-1*(1<<31)`,$d1 -+ mov $d2,$r1 # borrow $r1 -+ mov $d2#d,$h1#d -+ and \$`-1*(1<<31)`,$d2 -+ -+ shr \$6,$d1 -+ shl \$52,$r1 -+ add $d1,$h0 -+ shr \$12,$h1 -+ shr \$18,$d2 -+ add $r1,$h0 -+ adc $d2,$h1 -+ -+ mov $h2,$d1 -+ shl \$40,$d1 -+ shr \$24,$h2 -+ add $d1,$h1 -+ adc \$0,$h2 # can be partially reduced... -+ -+ mov \$-4,$d2 # ... so reduce -+ mov $h2,$d1 -+ and $h2,$d2 -+ shr \$2,$d1 -+ and \$3,$h2 -+ add $d2,$d1 # =*5 -+ add $d1,$h0 -+ adc \$0,$h1 -+ adc \$0,$h2 -+ -+ mov $s1,$r1 -+ mov $s1,%rax -+ shr \$2,$s1 -+ add $r1,$s1 # s1 = r1 + (r1 >> 2) -+ -+.Lbase2_26_pre_avx2: -+ add 0($inp),$h0 # accumulate input -+ adc 8($inp),$h1 -+ lea 16($inp),$inp -+ adc $padbit,$h2 -+ sub \$16,%r15 -+ -+ call __poly1305_block -+ mov $r1,%rax -+ -+ test \$63,%r15 -+ jnz .Lbase2_26_pre_avx2 -+ -+ test $padbit,$padbit # if $padbit is zero, -+ jz .Lstore_base2_64_avx2 # store hash in base 2^64 format -+ -+ ################################# base 2^64 -> base 2^26 -+ mov $h0,%rax -+ mov $h0,%rdx -+ shr \$52,$h0 -+ mov $h1,$r0 -+ mov $h1,$r1 -+ shr \$26,%rdx -+ and \$0x3ffffff,%rax # h[0] -+ shl \$12,$r0 -+ and \$0x3ffffff,%rdx # h[1] -+ shr \$14,$h1 -+ or $r0,$h0 -+ shl \$24,$h2 -+ and \$0x3ffffff,$h0 # h[2] -+ shr \$40,$r1 -+ and \$0x3ffffff,$h1 # h[3] -+ or $r1,$h2 # h[4] -+ -+ test %r15,%r15 -+ jz .Lstore_base2_26_avx2 -+ -+ vmovd %rax#d,%x#$H0 -+ vmovd %rdx#d,%x#$H1 -+ vmovd $h0#d,%x#$H2 -+ vmovd $h1#d,%x#$H3 -+ vmovd $h2#d,%x#$H4 -+ jmp .Lproceed_avx2 -+ -+.align 32 -+.Lstore_base2_64_avx2: -+ mov $h0,0($ctx) -+ mov $h1,8($ctx) -+ mov $h2,16($ctx) # note that is_base2_26 is zeroed -+ jmp .Ldone_avx2 -+ -+.align 16 -+.Lstore_base2_26_avx2: -+ mov %rax#d,0($ctx) # store hash value base 2^26 -+ mov %rdx#d,4($ctx) -+ mov $h0#d,8($ctx) -+ mov $h1#d,12($ctx) -+ mov $h2#d,16($ctx) -+.align 16 -+.Ldone_avx2: -+ mov 0(%rsp),%r15 -+.cfi_restore %r15 -+ mov 8(%rsp),%r14 -+.cfi_restore %r14 -+ mov 16(%rsp),%r13 -+.cfi_restore %r13 -+ mov 24(%rsp),%r12 -+.cfi_restore %r12 -+ mov 32(%rsp),%rbp -+.cfi_restore %rbp -+ mov 40(%rsp),%rbx -+.cfi_restore %rbx -+ lea 48(%rsp),%rsp -+.cfi_adjust_cfa_offset -48 -+.Lno_data_avx2: -+.Lblocks_avx2_epilogue: -+ ret -+.cfi_endproc -+ -+.align 32 -+.Lbase2_64_avx2: -+.cfi_startproc -+ push %rbx -+.cfi_push %rbx -+ push %rbp -+.cfi_push %rbp -+ push %r12 -+.cfi_push %r12 -+ push %r13 -+.cfi_push %r13 -+ push %r14 -+.cfi_push %r14 -+ push %r15 -+.cfi_push %r15 -+.Lbase2_64_avx2_body: -+ -+ mov $len,%r15 # reassign $len -+ -+ mov 24($ctx),$r0 # load r -+ mov 32($ctx),$s1 -+ -+ mov 0($ctx),$h0 # load hash value -+ mov 8($ctx),$h1 -+ mov 16($ctx),$h2#d -+ -+ mov $s1,$r1 -+ mov $s1,%rax -+ shr \$2,$s1 -+ add $r1,$s1 # s1 = r1 + (r1 >> 2) -+ -+ test \$63,$len -+ jz .Linit_avx2 -+ -+.Lbase2_64_pre_avx2: -+ add 0($inp),$h0 # accumulate input -+ adc 8($inp),$h1 -+ lea 16($inp),$inp -+ adc $padbit,$h2 -+ sub \$16,%r15 -+ -+ call __poly1305_block -+ mov $r1,%rax -+ -+ test \$63,%r15 -+ jnz .Lbase2_64_pre_avx2 -+ -+.Linit_avx2: -+ ################################# base 2^64 -> base 2^26 -+ mov $h0,%rax -+ mov $h0,%rdx -+ shr \$52,$h0 -+ mov $h1,$d1 -+ mov $h1,$d2 -+ shr \$26,%rdx -+ and \$0x3ffffff,%rax # h[0] -+ shl \$12,$d1 -+ and \$0x3ffffff,%rdx # h[1] -+ shr \$14,$h1 -+ or $d1,$h0 -+ shl \$24,$h2 -+ and \$0x3ffffff,$h0 # h[2] -+ shr \$40,$d2 -+ and \$0x3ffffff,$h1 # h[3] -+ or $d2,$h2 # h[4] -+ -+ vmovd %rax#d,%x#$H0 -+ vmovd %rdx#d,%x#$H1 -+ vmovd $h0#d,%x#$H2 -+ vmovd $h1#d,%x#$H3 -+ vmovd $h2#d,%x#$H4 -+ movl \$1,20($ctx) # set is_base2_26 -+ -+ call __poly1305_init_avx -+ -+.Lproceed_avx2: -+ mov %r15,$len # restore $len -+ mov OPENSSL_ia32cap_P+8(%rip),%r10d -+ mov \$`(1<<31|1<<30|1<<16)`,%r11d -+ -+ mov 0(%rsp),%r15 -+.cfi_restore %r15 -+ mov 8(%rsp),%r14 -+.cfi_restore %r14 -+ mov 16(%rsp),%r13 -+.cfi_restore %r13 -+ mov 24(%rsp),%r12 -+.cfi_restore %r12 -+ mov 32(%rsp),%rbp -+.cfi_restore %rbp -+ mov 40(%rsp),%rbx -+.cfi_restore %rbx -+ lea 48(%rsp),%rax -+ lea 48(%rsp),%rsp -+.cfi_adjust_cfa_offset -48 -+.Lbase2_64_avx2_epilogue: -+ jmp .Ldo_avx2 -+.cfi_endproc -+ -+.align 32 -+.Leven_avx2: -+.cfi_startproc -+ mov OPENSSL_ia32cap_P+8(%rip),%r10d -+ vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 -+ vmovd 4*1($ctx),%x#$H1 -+ vmovd 4*2($ctx),%x#$H2 -+ vmovd 4*3($ctx),%x#$H3 -+ vmovd 4*4($ctx),%x#$H4 -+ -+.Ldo_avx2: -+___ -+$code.=<<___ if ($avx>2); -+ cmp \$512,$len -+ jb .Lskip_avx512 -+ and %r11d,%r10d -+ test \$`1<<16`,%r10d # check for AVX512F -+ jnz .Lblocks_avx512 -+.Lskip_avx512: -+___ -+$code.=<<___ if (!$win64); -+ lea -8(%rsp),%r11 -+.cfi_def_cfa %r11,16 -+ sub \$0x128,%rsp -+___ -+$code.=<<___ if ($win64); -+ lea -0xf8(%rsp),%r11 -+ sub \$0x1c8,%rsp -+ vmovdqa %xmm6,0x50(%r11) -+ vmovdqa %xmm7,0x60(%r11) -+ vmovdqa %xmm8,0x70(%r11) -+ vmovdqa %xmm9,0x80(%r11) -+ vmovdqa %xmm10,0x90(%r11) -+ vmovdqa %xmm11,0xa0(%r11) -+ vmovdqa %xmm12,0xb0(%r11) -+ vmovdqa %xmm13,0xc0(%r11) -+ vmovdqa %xmm14,0xd0(%r11) -+ vmovdqa %xmm15,0xe0(%r11) -+.Ldo_avx2_body: -+___ -+$code.=<<___; -+ lea .Lconst(%rip),%rcx -+ lea 48+64($ctx),$ctx # size optimization -+ vmovdqa 96(%rcx),$T0 # .Lpermd_avx2 -+ -+ # expand and copy pre-calculated table to stack -+ vmovdqu `16*0-64`($ctx),%x#$T2 -+ and \$-512,%rsp -+ vmovdqu `16*1-64`($ctx),%x#$T3 -+ vmovdqu `16*2-64`($ctx),%x#$T4 -+ vmovdqu `16*3-64`($ctx),%x#$D0 -+ vmovdqu `16*4-64`($ctx),%x#$D1 -+ vmovdqu `16*5-64`($ctx),%x#$D2 -+ lea 0x90(%rsp),%rax # size optimization -+ vmovdqu `16*6-64`($ctx),%x#$D3 -+ vpermd $T2,$T0,$T2 # 00003412 -> 14243444 -+ vmovdqu `16*7-64`($ctx),%x#$D4 -+ vpermd $T3,$T0,$T3 -+ vmovdqu `16*8-64`($ctx),%x#$MASK -+ vpermd $T4,$T0,$T4 -+ vmovdqa $T2,0x00(%rsp) -+ vpermd $D0,$T0,$D0 -+ vmovdqa $T3,0x20-0x90(%rax) -+ vpermd $D1,$T0,$D1 -+ vmovdqa $T4,0x40-0x90(%rax) -+ vpermd $D2,$T0,$D2 -+ vmovdqa $D0,0x60-0x90(%rax) -+ vpermd $D3,$T0,$D3 -+ vmovdqa $D1,0x80-0x90(%rax) -+ vpermd $D4,$T0,$D4 -+ vmovdqa $D2,0xa0-0x90(%rax) -+ vpermd $MASK,$T0,$MASK -+ vmovdqa $D3,0xc0-0x90(%rax) -+ vmovdqa $D4,0xe0-0x90(%rax) -+ vmovdqa $MASK,0x100-0x90(%rax) -+ vmovdqa 64(%rcx),$MASK # .Lmask26 -+ -+ ################################################################ -+ # load input -+ vmovdqu 16*0($inp),%x#$T0 -+ vmovdqu 16*1($inp),%x#$T1 -+ vinserti128 \$1,16*2($inp),$T0,$T0 -+ vinserti128 \$1,16*3($inp),$T1,$T1 -+ lea 16*4($inp),$inp -+ -+ vpsrldq \$6,$T0,$T2 # splat input -+ vpsrldq \$6,$T1,$T3 -+ vpunpckhqdq $T1,$T0,$T4 # 4 -+ vpunpcklqdq $T3,$T2,$T2 # 2:3 -+ vpunpcklqdq $T1,$T0,$T0 # 0:1 -+ -+ vpsrlq \$30,$T2,$T3 -+ vpsrlq \$4,$T2,$T2 -+ vpsrlq \$26,$T0,$T1 -+ vpsrlq \$40,$T4,$T4 # 4 -+ vpand $MASK,$T2,$T2 # 2 -+ vpand $MASK,$T0,$T0 # 0 -+ vpand $MASK,$T1,$T1 # 1 -+ vpand $MASK,$T3,$T3 # 3 -+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always -+ -+ vpaddq $H2,$T2,$H2 # accumulate input -+ sub \$64,$len -+ jz .Ltail_avx2 -+ jmp .Loop_avx2 -+ -+.align 32 -+.Loop_avx2: -+ ################################################################ -+ # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 -+ # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 -+ # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2 -+ # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1 -+ # \________/\__________/ -+ ################################################################ -+ #vpaddq $H2,$T2,$H2 # accumulate input -+ vpaddq $H0,$T0,$H0 -+ vmovdqa `32*0`(%rsp),$T0 # r0^4 -+ vpaddq $H1,$T1,$H1 -+ vmovdqa `32*1`(%rsp),$T1 # r1^4 -+ vpaddq $H3,$T3,$H3 -+ vmovdqa `32*3`(%rsp),$T2 # r2^4 -+ vpaddq $H4,$T4,$H4 -+ vmovdqa `32*6-0x90`(%rax),$T3 # s3^4 -+ vmovdqa `32*8-0x90`(%rax),$S4 # s4^4 -+ -+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ # -+ # however, as h2 is "chronologically" first one available pull -+ # corresponding operations up, so it's -+ # -+ # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4 -+ # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 -+ # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4 -+ -+ vpmuludq $H2,$T0,$D2 # d2 = h2*r0 -+ vpmuludq $H2,$T1,$D3 # d3 = h2*r1 -+ vpmuludq $H2,$T2,$D4 # d4 = h2*r2 -+ vpmuludq $H2,$T3,$D0 # d0 = h2*s3 -+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4 -+ -+ vpmuludq $H0,$T1,$T4 # h0*r1 -+ vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp -+ vpaddq $T4,$D1,$D1 # d1 += h0*r1 -+ vpaddq $H2,$D2,$D2 # d2 += h1*r1 -+ vpmuludq $H3,$T1,$T4 # h3*r1 -+ vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1 -+ vpaddq $T4,$D4,$D4 # d4 += h3*r1 -+ vpaddq $H2,$D0,$D0 # d0 += h4*s1 -+ vmovdqa `32*4-0x90`(%rax),$T1 # s2 -+ -+ vpmuludq $H0,$T0,$T4 # h0*r0 -+ vpmuludq $H1,$T0,$H2 # h1*r0 -+ vpaddq $T4,$D0,$D0 # d0 += h0*r0 -+ vpaddq $H2,$D1,$D1 # d1 += h1*r0 -+ vpmuludq $H3,$T0,$T4 # h3*r0 -+ vpmuludq $H4,$T0,$H2 # h4*r0 -+ vmovdqu 16*0($inp),%x#$T0 # load input -+ vpaddq $T4,$D3,$D3 # d3 += h3*r0 -+ vpaddq $H2,$D4,$D4 # d4 += h4*r0 -+ vinserti128 \$1,16*2($inp),$T0,$T0 -+ -+ vpmuludq $H3,$T1,$T4 # h3*s2 -+ vpmuludq $H4,$T1,$H2 # h4*s2 -+ vmovdqu 16*1($inp),%x#$T1 -+ vpaddq $T4,$D0,$D0 # d0 += h3*s2 -+ vpaddq $H2,$D1,$D1 # d1 += h4*s2 -+ vmovdqa `32*5-0x90`(%rax),$H2 # r3 -+ vpmuludq $H1,$T2,$T4 # h1*r2 -+ vpmuludq $H0,$T2,$T2 # h0*r2 -+ vpaddq $T4,$D3,$D3 # d3 += h1*r2 -+ vpaddq $T2,$D2,$D2 # d2 += h0*r2 -+ vinserti128 \$1,16*3($inp),$T1,$T1 -+ lea 16*4($inp),$inp -+ -+ vpmuludq $H1,$H2,$T4 # h1*r3 -+ vpmuludq $H0,$H2,$H2 # h0*r3 -+ vpsrldq \$6,$T0,$T2 # splat input -+ vpaddq $T4,$D4,$D4 # d4 += h1*r3 -+ vpaddq $H2,$D3,$D3 # d3 += h0*r3 -+ vpmuludq $H3,$T3,$T4 # h3*s3 -+ vpmuludq $H4,$T3,$H2 # h4*s3 -+ vpsrldq \$6,$T1,$T3 -+ vpaddq $T4,$D1,$D1 # d1 += h3*s3 -+ vpaddq $H2,$D2,$D2 # d2 += h4*s3 -+ vpunpckhqdq $T1,$T0,$T4 # 4 -+ -+ vpmuludq $H3,$S4,$H3 # h3*s4 -+ vpmuludq $H4,$S4,$H4 # h4*s4 -+ vpunpcklqdq $T1,$T0,$T0 # 0:1 -+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 -+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 -+ vpunpcklqdq $T3,$T2,$T3 # 2:3 -+ vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4 -+ vpmuludq $H1,$S4,$H0 # h1*s4 -+ vmovdqa 64(%rcx),$MASK # .Lmask26 -+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 -+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 -+ -+ ################################################################ -+ # lazy reduction (interleaved with tail of input splat) -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$D1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H4,$D4 -+ vpand $MASK,$H4,$H4 -+ -+ vpsrlq \$4,$T3,$T2 -+ -+ vpsrlq \$26,$H1,$D1 -+ vpand $MASK,$H1,$H1 -+ vpaddq $D1,$H2,$H2 # h1 -> h2 -+ -+ vpaddq $D4,$H0,$H0 -+ vpsllq \$2,$D4,$D4 -+ vpaddq $D4,$H0,$H0 # h4 -> h0 -+ -+ vpand $MASK,$T2,$T2 # 2 -+ vpsrlq \$26,$T0,$T1 -+ -+ vpsrlq \$26,$H2,$D2 -+ vpand $MASK,$H2,$H2 -+ vpaddq $D2,$H3,$H3 # h2 -> h3 -+ -+ vpaddq $T2,$H2,$H2 # modulo-scheduled -+ vpsrlq \$30,$T3,$T3 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$40,$T4,$T4 # 4 -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpand $MASK,$T0,$T0 # 0 -+ vpand $MASK,$T1,$T1 # 1 -+ vpand $MASK,$T3,$T3 # 3 -+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always -+ -+ sub \$64,$len -+ jnz .Loop_avx2 -+ -+ .byte 0x66,0x90 -+.Ltail_avx2: -+ ################################################################ -+ # while above multiplications were by r^4 in all lanes, in last -+ # iteration we multiply least significant lane by r^4 and most -+ # significant one by r, so copy of above except that references -+ # to the precomputed table are displaced by 4... -+ -+ #vpaddq $H2,$T2,$H2 # accumulate input -+ vpaddq $H0,$T0,$H0 -+ vmovdqu `32*0+4`(%rsp),$T0 # r0^4 -+ vpaddq $H1,$T1,$H1 -+ vmovdqu `32*1+4`(%rsp),$T1 # r1^4 -+ vpaddq $H3,$T3,$H3 -+ vmovdqu `32*3+4`(%rsp),$T2 # r2^4 -+ vpaddq $H4,$T4,$H4 -+ vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4 -+ vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4 -+ -+ vpmuludq $H2,$T0,$D2 # d2 = h2*r0 -+ vpmuludq $H2,$T1,$D3 # d3 = h2*r1 -+ vpmuludq $H2,$T2,$D4 # d4 = h2*r2 -+ vpmuludq $H2,$T3,$D0 # d0 = h2*s3 -+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4 -+ -+ vpmuludq $H0,$T1,$T4 # h0*r1 -+ vpmuludq $H1,$T1,$H2 # h1*r1 -+ vpaddq $T4,$D1,$D1 # d1 += h0*r1 -+ vpaddq $H2,$D2,$D2 # d2 += h1*r1 -+ vpmuludq $H3,$T1,$T4 # h3*r1 -+ vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1 -+ vpaddq $T4,$D4,$D4 # d4 += h3*r1 -+ vpaddq $H2,$D0,$D0 # d0 += h4*s1 -+ -+ vpmuludq $H0,$T0,$T4 # h0*r0 -+ vpmuludq $H1,$T0,$H2 # h1*r0 -+ vpaddq $T4,$D0,$D0 # d0 += h0*r0 -+ vmovdqu `32*4+4-0x90`(%rax),$T1 # s2 -+ vpaddq $H2,$D1,$D1 # d1 += h1*r0 -+ vpmuludq $H3,$T0,$T4 # h3*r0 -+ vpmuludq $H4,$T0,$H2 # h4*r0 -+ vpaddq $T4,$D3,$D3 # d3 += h3*r0 -+ vpaddq $H2,$D4,$D4 # d4 += h4*r0 -+ -+ vpmuludq $H3,$T1,$T4 # h3*s2 -+ vpmuludq $H4,$T1,$H2 # h4*s2 -+ vpaddq $T4,$D0,$D0 # d0 += h3*s2 -+ vpaddq $H2,$D1,$D1 # d1 += h4*s2 -+ vmovdqu `32*5+4-0x90`(%rax),$H2 # r3 -+ vpmuludq $H1,$T2,$T4 # h1*r2 -+ vpmuludq $H0,$T2,$T2 # h0*r2 -+ vpaddq $T4,$D3,$D3 # d3 += h1*r2 -+ vpaddq $T2,$D2,$D2 # d2 += h0*r2 -+ -+ vpmuludq $H1,$H2,$T4 # h1*r3 -+ vpmuludq $H0,$H2,$H2 # h0*r3 -+ vpaddq $T4,$D4,$D4 # d4 += h1*r3 -+ vpaddq $H2,$D3,$D3 # d3 += h0*r3 -+ vpmuludq $H3,$T3,$T4 # h3*s3 -+ vpmuludq $H4,$T3,$H2 # h4*s3 -+ vpaddq $T4,$D1,$D1 # d1 += h3*s3 -+ vpaddq $H2,$D2,$D2 # d2 += h4*s3 -+ -+ vpmuludq $H3,$S4,$H3 # h3*s4 -+ vpmuludq $H4,$S4,$H4 # h4*s4 -+ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4 -+ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4 -+ vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4 -+ vpmuludq $H1,$S4,$H0 # h1*s4 -+ vmovdqa 64(%rcx),$MASK # .Lmask26 -+ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4 -+ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4 -+ -+ ################################################################ -+ # horizontal addition -+ -+ vpsrldq \$8,$D1,$T1 -+ vpsrldq \$8,$H2,$T2 -+ vpsrldq \$8,$H3,$T3 -+ vpsrldq \$8,$H4,$T4 -+ vpsrldq \$8,$H0,$T0 -+ vpaddq $T1,$D1,$D1 -+ vpaddq $T2,$H2,$H2 -+ vpaddq $T3,$H3,$H3 -+ vpaddq $T4,$H4,$H4 -+ vpaddq $T0,$H0,$H0 -+ -+ vpermq \$0x2,$H3,$T3 -+ vpermq \$0x2,$H4,$T4 -+ vpermq \$0x2,$H0,$T0 -+ vpermq \$0x2,$D1,$T1 -+ vpermq \$0x2,$H2,$T2 -+ vpaddq $T3,$H3,$H3 -+ vpaddq $T4,$H4,$H4 -+ vpaddq $T0,$H0,$H0 -+ vpaddq $T1,$D1,$D1 -+ vpaddq $T2,$H2,$H2 -+ -+ ################################################################ -+ # lazy reduction -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$D1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H4,$D4 -+ vpand $MASK,$H4,$H4 -+ -+ vpsrlq \$26,$H1,$D1 -+ vpand $MASK,$H1,$H1 -+ vpaddq $D1,$H2,$H2 # h1 -> h2 -+ -+ vpaddq $D4,$H0,$H0 -+ vpsllq \$2,$D4,$D4 -+ vpaddq $D4,$H0,$H0 # h4 -> h0 -+ -+ vpsrlq \$26,$H2,$D2 -+ vpand $MASK,$H2,$H2 -+ vpaddq $D2,$H3,$H3 # h2 -> h3 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced -+ vmovd %x#$H1,`4*1-48-64`($ctx) -+ vmovd %x#$H2,`4*2-48-64`($ctx) -+ vmovd %x#$H3,`4*3-48-64`($ctx) -+ vmovd %x#$H4,`4*4-48-64`($ctx) -+___ -+$code.=<<___ if ($win64); -+ vmovdqa 0x50(%r11),%xmm6 -+ vmovdqa 0x60(%r11),%xmm7 -+ vmovdqa 0x70(%r11),%xmm8 -+ vmovdqa 0x80(%r11),%xmm9 -+ vmovdqa 0x90(%r11),%xmm10 -+ vmovdqa 0xa0(%r11),%xmm11 -+ vmovdqa 0xb0(%r11),%xmm12 -+ vmovdqa 0xc0(%r11),%xmm13 -+ vmovdqa 0xd0(%r11),%xmm14 -+ vmovdqa 0xe0(%r11),%xmm15 -+ lea 0xf8(%r11),%rsp -+.Ldo_avx2_epilogue: -+___ -+$code.=<<___ if (!$win64); -+ lea 8(%r11),%rsp -+.cfi_def_cfa %rsp,8 -+___ -+$code.=<<___; -+ vzeroupper -+ ret -+.cfi_endproc -+.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 -+___ -+####################################################################### -+if ($avx>2) { -+# On entry we have input length divisible by 64. But since inner loop -+# processes 128 bytes per iteration, cases when length is not divisible -+# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this -+# reason stack layout is kept identical to poly1305_blocks_avx2. If not -+# for this tail, we wouldn't have to even allocate stack frame... -+ -+my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); -+my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); -+my $PADBIT="%zmm30"; -+ -+map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain -+map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4)); -+map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); -+map(s/%y/%z/,($MASK)); -+ -+$code.=<<___; -+.type poly1305_blocks_avx512,\@function,4 -+.align 32 -+poly1305_blocks_avx512: -+.cfi_startproc -+.Lblocks_avx512: -+ mov \$15,%eax -+ kmovw %eax,%k2 -+___ -+$code.=<<___ if (!$win64); -+ lea -8(%rsp),%r11 -+.cfi_def_cfa %r11,16 -+ sub \$0x128,%rsp -+___ -+$code.=<<___ if ($win64); -+ lea -0xf8(%rsp),%r11 -+ sub \$0x1c8,%rsp -+ vmovdqa %xmm6,0x50(%r11) -+ vmovdqa %xmm7,0x60(%r11) -+ vmovdqa %xmm8,0x70(%r11) -+ vmovdqa %xmm9,0x80(%r11) -+ vmovdqa %xmm10,0x90(%r11) -+ vmovdqa %xmm11,0xa0(%r11) -+ vmovdqa %xmm12,0xb0(%r11) -+ vmovdqa %xmm13,0xc0(%r11) -+ vmovdqa %xmm14,0xd0(%r11) -+ vmovdqa %xmm15,0xe0(%r11) -+.Ldo_avx512_body: -+___ -+$code.=<<___; -+ lea .Lconst(%rip),%rcx -+ lea 48+64($ctx),$ctx # size optimization -+ vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2 -+ -+ # expand pre-calculated table -+ vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0} -+ and \$-512,%rsp -+ vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1} -+ mov \$0x20,%rax -+ vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1} -+ vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2} -+ vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2} -+ vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3} -+ vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3} -+ vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4} -+ vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4} -+ vpermd $D0,$T2,$R0 # 00003412 -> 14243444 -+ vpbroadcastq 64(%rcx),$MASK # .Lmask26 -+ vpermd $D1,$T2,$R1 -+ vpermd $T0,$T2,$S1 -+ vpermd $D2,$T2,$R2 -+ vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0 -+ vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304 -+ vpermd $T1,$T2,$S2 -+ vmovdqu64 $R1,0x00(%rsp,%rax){%k2} -+ vpsrlq \$32,$R1,$T1 -+ vpermd $D3,$T2,$R3 -+ vmovdqa64 $S1,0x40(%rsp){%k2} -+ vpermd $T3,$T2,$S3 -+ vpermd $D4,$T2,$R4 -+ vmovdqu64 $R2,0x40(%rsp,%rax){%k2} -+ vpermd $T4,$T2,$S4 -+ vmovdqa64 $S2,0x80(%rsp){%k2} -+ vmovdqu64 $R3,0x80(%rsp,%rax){%k2} -+ vmovdqa64 $S3,0xc0(%rsp){%k2} -+ vmovdqu64 $R4,0xc0(%rsp,%rax){%k2} -+ vmovdqa64 $S4,0x100(%rsp){%k2} -+ -+ ################################################################ -+ # calculate 5th through 8th powers of the key -+ # -+ # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1 -+ # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2 -+ # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3 -+ # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4 -+ # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0 -+ -+ vpmuludq $T0,$R0,$D0 # d0 = r0'*r0 -+ vpmuludq $T0,$R1,$D1 # d1 = r0'*r1 -+ vpmuludq $T0,$R2,$D2 # d2 = r0'*r2 -+ vpmuludq $T0,$R3,$D3 # d3 = r0'*r3 -+ vpmuludq $T0,$R4,$D4 # d4 = r0'*r4 -+ vpsrlq \$32,$R2,$T2 -+ -+ vpmuludq $T1,$S4,$M0 -+ vpmuludq $T1,$R0,$M1 -+ vpmuludq $T1,$R1,$M2 -+ vpmuludq $T1,$R2,$M3 -+ vpmuludq $T1,$R3,$M4 -+ vpsrlq \$32,$R3,$T3 -+ vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4 -+ vpaddq $M1,$D1,$D1 # d1 += r1'*r0 -+ vpaddq $M2,$D2,$D2 # d2 += r1'*r1 -+ vpaddq $M3,$D3,$D3 # d3 += r1'*r2 -+ vpaddq $M4,$D4,$D4 # d4 += r1'*r3 -+ -+ vpmuludq $T2,$S3,$M0 -+ vpmuludq $T2,$S4,$M1 -+ vpmuludq $T2,$R1,$M3 -+ vpmuludq $T2,$R2,$M4 -+ vpmuludq $T2,$R0,$M2 -+ vpsrlq \$32,$R4,$T4 -+ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3 -+ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4 -+ vpaddq $M3,$D3,$D3 # d3 += r2'*r1 -+ vpaddq $M4,$D4,$D4 # d4 += r2'*r2 -+ vpaddq $M2,$D2,$D2 # d2 += r2'*r0 -+ -+ vpmuludq $T3,$S2,$M0 -+ vpmuludq $T3,$R0,$M3 -+ vpmuludq $T3,$R1,$M4 -+ vpmuludq $T3,$S3,$M1 -+ vpmuludq $T3,$S4,$M2 -+ vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2 -+ vpaddq $M3,$D3,$D3 # d3 += r3'*r0 -+ vpaddq $M4,$D4,$D4 # d4 += r3'*r1 -+ vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3 -+ vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4 -+ -+ vpmuludq $T4,$S4,$M3 -+ vpmuludq $T4,$R0,$M4 -+ vpmuludq $T4,$S1,$M0 -+ vpmuludq $T4,$S2,$M1 -+ vpmuludq $T4,$S3,$M2 -+ vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4 -+ vpaddq $M4,$D4,$D4 # d4 += r2'*r0 -+ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1 -+ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2 -+ vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3 -+ -+ ################################################################ -+ # load input -+ vmovdqu64 16*0($inp),%z#$T3 -+ vmovdqu64 16*4($inp),%z#$T4 -+ lea 16*8($inp),$inp -+ -+ ################################################################ -+ # lazy reduction -+ -+ vpsrlq \$26,$D3,$M3 -+ vpandq $MASK,$D3,$D3 -+ vpaddq $M3,$D4,$D4 # d3 -> d4 -+ -+ vpsrlq \$26,$D0,$M0 -+ vpandq $MASK,$D0,$D0 -+ vpaddq $M0,$D1,$D1 # d0 -> d1 -+ -+ vpsrlq \$26,$D4,$M4 -+ vpandq $MASK,$D4,$D4 -+ -+ vpsrlq \$26,$D1,$M1 -+ vpandq $MASK,$D1,$D1 -+ vpaddq $M1,$D2,$D2 # d1 -> d2 -+ -+ vpaddq $M4,$D0,$D0 -+ vpsllq \$2,$M4,$M4 -+ vpaddq $M4,$D0,$D0 # d4 -> d0 -+ -+ vpsrlq \$26,$D2,$M2 -+ vpandq $MASK,$D2,$D2 -+ vpaddq $M2,$D3,$D3 # d2 -> d3 -+ -+ vpsrlq \$26,$D0,$M0 -+ vpandq $MASK,$D0,$D0 -+ vpaddq $M0,$D1,$D1 # d0 -> d1 -+ -+ vpsrlq \$26,$D3,$M3 -+ vpandq $MASK,$D3,$D3 -+ vpaddq $M3,$D4,$D4 # d3 -> d4 -+ -+ ################################################################ -+ # at this point we have 14243444 in $R0-$S4 and 05060708 in -+ # $D0-$D4, ... -+ -+ vpunpcklqdq $T4,$T3,$T0 # transpose input -+ vpunpckhqdq $T4,$T3,$T4 -+ -+ # ... since input 64-bit lanes are ordered as 73625140, we could -+ # "vperm" it to 76543210 (here and in each loop iteration), *or* -+ # we could just flow along, hence the goal for $R0-$S4 is -+ # 1858286838784888 ... -+ -+ vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512: -+ mov \$0x7777,%eax -+ kmovw %eax,%k1 -+ -+ vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4--- -+ vpermd $R1,$M0,$R1 -+ vpermd $R2,$M0,$R2 -+ vpermd $R3,$M0,$R3 -+ vpermd $R4,$M0,$R4 -+ -+ vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888 -+ vpermd $D1,$M0,${R1}{%k1} -+ vpermd $D2,$M0,${R2}{%k1} -+ vpermd $D3,$M0,${R3}{%k1} -+ vpermd $D4,$M0,${R4}{%k1} -+ -+ vpslld \$2,$R1,$S1 # *5 -+ vpslld \$2,$R2,$S2 -+ vpslld \$2,$R3,$S3 -+ vpslld \$2,$R4,$S4 -+ vpaddd $R1,$S1,$S1 -+ vpaddd $R2,$S2,$S2 -+ vpaddd $R3,$S3,$S3 -+ vpaddd $R4,$S4,$S4 -+ -+ vpbroadcastq 32(%rcx),$PADBIT # .L129 -+ -+ vpsrlq \$52,$T0,$T2 # splat input -+ vpsllq \$12,$T4,$T3 -+ vporq $T3,$T2,$T2 -+ vpsrlq \$26,$T0,$T1 -+ vpsrlq \$14,$T4,$T3 -+ vpsrlq \$40,$T4,$T4 # 4 -+ vpandq $MASK,$T2,$T2 # 2 -+ vpandq $MASK,$T0,$T0 # 0 -+ #vpandq $MASK,$T1,$T1 # 1 -+ #vpandq $MASK,$T3,$T3 # 3 -+ #vporq $PADBIT,$T4,$T4 # padbit, yes, always -+ -+ vpaddq $H2,$T2,$H2 # accumulate input -+ sub \$192,$len -+ jbe .Ltail_avx512 -+ jmp .Loop_avx512 -+ -+.align 32 -+.Loop_avx512: -+ ################################################################ -+ # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8 -+ # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7 -+ # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6 -+ # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5 -+ # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4 -+ # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3 -+ # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2 -+ # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1 -+ # \________/\___________/ -+ ################################################################ -+ #vpaddq $H2,$T2,$H2 # accumulate input -+ -+ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 -+ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 -+ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 -+ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 -+ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 -+ # -+ # however, as h2 is "chronologically" first one available pull -+ # corresponding operations up, so it's -+ # -+ # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4 -+ # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0 -+ # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1 -+ # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2 -+ # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3 -+ -+ vpmuludq $H2,$R1,$D3 # d3 = h2*r1 -+ vpaddq $H0,$T0,$H0 -+ vpmuludq $H2,$R2,$D4 # d4 = h2*r2 -+ vpandq $MASK,$T1,$T1 # 1 -+ vpmuludq $H2,$S3,$D0 # d0 = h2*s3 -+ vpandq $MASK,$T3,$T3 # 3 -+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4 -+ vporq $PADBIT,$T4,$T4 # padbit, yes, always -+ vpmuludq $H2,$R0,$D2 # d2 = h2*r0 -+ vpaddq $H1,$T1,$H1 # accumulate input -+ vpaddq $H3,$T3,$H3 -+ vpaddq $H4,$T4,$H4 -+ -+ vmovdqu64 16*0($inp),$T3 # load input -+ vmovdqu64 16*4($inp),$T4 -+ lea 16*8($inp),$inp -+ vpmuludq $H0,$R3,$M3 -+ vpmuludq $H0,$R4,$M4 -+ vpmuludq $H0,$R0,$M0 -+ vpmuludq $H0,$R1,$M1 -+ vpaddq $M3,$D3,$D3 # d3 += h0*r3 -+ vpaddq $M4,$D4,$D4 # d4 += h0*r4 -+ vpaddq $M0,$D0,$D0 # d0 += h0*r0 -+ vpaddq $M1,$D1,$D1 # d1 += h0*r1 -+ -+ vpmuludq $H1,$R2,$M3 -+ vpmuludq $H1,$R3,$M4 -+ vpmuludq $H1,$S4,$M0 -+ vpmuludq $H0,$R2,$M2 -+ vpaddq $M3,$D3,$D3 # d3 += h1*r2 -+ vpaddq $M4,$D4,$D4 # d4 += h1*r3 -+ vpaddq $M0,$D0,$D0 # d0 += h1*s4 -+ vpaddq $M2,$D2,$D2 # d2 += h0*r2 -+ -+ vpunpcklqdq $T4,$T3,$T0 # transpose input -+ vpunpckhqdq $T4,$T3,$T4 -+ -+ vpmuludq $H3,$R0,$M3 -+ vpmuludq $H3,$R1,$M4 -+ vpmuludq $H1,$R0,$M1 -+ vpmuludq $H1,$R1,$M2 -+ vpaddq $M3,$D3,$D3 # d3 += h3*r0 -+ vpaddq $M4,$D4,$D4 # d4 += h3*r1 -+ vpaddq $M1,$D1,$D1 # d1 += h1*r0 -+ vpaddq $M2,$D2,$D2 # d2 += h1*r1 -+ -+ vpmuludq $H4,$S4,$M3 -+ vpmuludq $H4,$R0,$M4 -+ vpmuludq $H3,$S2,$M0 -+ vpmuludq $H3,$S3,$M1 -+ vpaddq $M3,$D3,$D3 # d3 += h4*s4 -+ vpmuludq $H3,$S4,$M2 -+ vpaddq $M4,$D4,$D4 # d4 += h4*r0 -+ vpaddq $M0,$D0,$D0 # d0 += h3*s2 -+ vpaddq $M1,$D1,$D1 # d1 += h3*s3 -+ vpaddq $M2,$D2,$D2 # d2 += h3*s4 -+ -+ vpmuludq $H4,$S1,$M0 -+ vpmuludq $H4,$S2,$M1 -+ vpmuludq $H4,$S3,$M2 -+ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 -+ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 -+ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 -+ -+ ################################################################ -+ # lazy reduction (interleaved with input splat) -+ -+ vpsrlq \$52,$T0,$T2 # splat input -+ vpsllq \$12,$T4,$T3 -+ -+ vpsrlq \$26,$D3,$H3 -+ vpandq $MASK,$D3,$D3 -+ vpaddq $H3,$D4,$H4 # h3 -> h4 -+ -+ vporq $T3,$T2,$T2 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpandq $MASK,$H0,$H0 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpandq $MASK,$T2,$T2 # 2 -+ -+ vpsrlq \$26,$H4,$D4 -+ vpandq $MASK,$H4,$H4 -+ -+ vpsrlq \$26,$H1,$D1 -+ vpandq $MASK,$H1,$H1 -+ vpaddq $D1,$H2,$H2 # h1 -> h2 -+ -+ vpaddq $D4,$H0,$H0 -+ vpsllq \$2,$D4,$D4 -+ vpaddq $D4,$H0,$H0 # h4 -> h0 -+ -+ vpaddq $T2,$H2,$H2 # modulo-scheduled -+ vpsrlq \$26,$T0,$T1 -+ -+ vpsrlq \$26,$H2,$D2 -+ vpandq $MASK,$H2,$H2 -+ vpaddq $D2,$D3,$H3 # h2 -> h3 -+ -+ vpsrlq \$14,$T4,$T3 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpandq $MASK,$H0,$H0 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$40,$T4,$T4 # 4 -+ -+ vpsrlq \$26,$H3,$D3 -+ vpandq $MASK,$H3,$H3 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpandq $MASK,$T0,$T0 # 0 -+ #vpandq $MASK,$T1,$T1 # 1 -+ #vpandq $MASK,$T3,$T3 # 3 -+ #vporq $PADBIT,$T4,$T4 # padbit, yes, always -+ -+ sub \$128,$len -+ ja .Loop_avx512 -+ -+.Ltail_avx512: -+ ################################################################ -+ # while above multiplications were by r^8 in all lanes, in last -+ # iteration we multiply least significant lane by r^8 and most -+ # significant one by r, that's why table gets shifted... -+ -+ vpsrlq \$32,$R0,$R0 # 0105020603070408 -+ vpsrlq \$32,$R1,$R1 -+ vpsrlq \$32,$R2,$R2 -+ vpsrlq \$32,$S3,$S3 -+ vpsrlq \$32,$S4,$S4 -+ vpsrlq \$32,$R3,$R3 -+ vpsrlq \$32,$R4,$R4 -+ vpsrlq \$32,$S1,$S1 -+ vpsrlq \$32,$S2,$S2 -+ -+ ################################################################ -+ # load either next or last 64 byte of input -+ lea ($inp,$len),$inp -+ -+ #vpaddq $H2,$T2,$H2 # accumulate input -+ vpaddq $H0,$T0,$H0 -+ -+ vpmuludq $H2,$R1,$D3 # d3 = h2*r1 -+ vpmuludq $H2,$R2,$D4 # d4 = h2*r2 -+ vpmuludq $H2,$S3,$D0 # d0 = h2*s3 -+ vpandq $MASK,$T1,$T1 # 1 -+ vpmuludq $H2,$S4,$D1 # d1 = h2*s4 -+ vpandq $MASK,$T3,$T3 # 3 -+ vpmuludq $H2,$R0,$D2 # d2 = h2*r0 -+ vporq $PADBIT,$T4,$T4 # padbit, yes, always -+ vpaddq $H1,$T1,$H1 # accumulate input -+ vpaddq $H3,$T3,$H3 -+ vpaddq $H4,$T4,$H4 -+ -+ vmovdqu 16*0($inp),%x#$T0 -+ vpmuludq $H0,$R3,$M3 -+ vpmuludq $H0,$R4,$M4 -+ vpmuludq $H0,$R0,$M0 -+ vpmuludq $H0,$R1,$M1 -+ vpaddq $M3,$D3,$D3 # d3 += h0*r3 -+ vpaddq $M4,$D4,$D4 # d4 += h0*r4 -+ vpaddq $M0,$D0,$D0 # d0 += h0*r0 -+ vpaddq $M1,$D1,$D1 # d1 += h0*r1 -+ -+ vmovdqu 16*1($inp),%x#$T1 -+ vpmuludq $H1,$R2,$M3 -+ vpmuludq $H1,$R3,$M4 -+ vpmuludq $H1,$S4,$M0 -+ vpmuludq $H0,$R2,$M2 -+ vpaddq $M3,$D3,$D3 # d3 += h1*r2 -+ vpaddq $M4,$D4,$D4 # d4 += h1*r3 -+ vpaddq $M0,$D0,$D0 # d0 += h1*s4 -+ vpaddq $M2,$D2,$D2 # d2 += h0*r2 -+ -+ vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0 -+ vpmuludq $H3,$R0,$M3 -+ vpmuludq $H3,$R1,$M4 -+ vpmuludq $H1,$R0,$M1 -+ vpmuludq $H1,$R1,$M2 -+ vpaddq $M3,$D3,$D3 # d3 += h3*r0 -+ vpaddq $M4,$D4,$D4 # d4 += h3*r1 -+ vpaddq $M1,$D1,$D1 # d1 += h1*r0 -+ vpaddq $M2,$D2,$D2 # d2 += h1*r1 -+ -+ vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1 -+ vpmuludq $H4,$S4,$M3 -+ vpmuludq $H4,$R0,$M4 -+ vpmuludq $H3,$S2,$M0 -+ vpmuludq $H3,$S3,$M1 -+ vpmuludq $H3,$S4,$M2 -+ vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4 -+ vpaddq $M4,$D4,$D4 # d4 += h4*r0 -+ vpaddq $M0,$D0,$D0 # d0 += h3*s2 -+ vpaddq $M1,$D1,$D1 # d1 += h3*s3 -+ vpaddq $M2,$D2,$D2 # d2 += h3*s4 -+ -+ vpmuludq $H4,$S1,$M0 -+ vpmuludq $H4,$S2,$M1 -+ vpmuludq $H4,$S3,$M2 -+ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1 -+ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2 -+ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3 -+ -+ ################################################################ -+ # horizontal addition -+ -+ mov \$1,%eax -+ vpermq \$0xb1,$H3,$D3 -+ vpermq \$0xb1,$D4,$H4 -+ vpermq \$0xb1,$H0,$D0 -+ vpermq \$0xb1,$H1,$D1 -+ vpermq \$0xb1,$H2,$D2 -+ vpaddq $D3,$H3,$H3 -+ vpaddq $D4,$H4,$H4 -+ vpaddq $D0,$H0,$H0 -+ vpaddq $D1,$H1,$H1 -+ vpaddq $D2,$H2,$H2 -+ -+ kmovw %eax,%k3 -+ vpermq \$0x2,$H3,$D3 -+ vpermq \$0x2,$H4,$D4 -+ vpermq \$0x2,$H0,$D0 -+ vpermq \$0x2,$H1,$D1 -+ vpermq \$0x2,$H2,$D2 -+ vpaddq $D3,$H3,$H3 -+ vpaddq $D4,$H4,$H4 -+ vpaddq $D0,$H0,$H0 -+ vpaddq $D1,$H1,$H1 -+ vpaddq $D2,$H2,$H2 -+ -+ vextracti64x4 \$0x1,$H3,%y#$D3 -+ vextracti64x4 \$0x1,$H4,%y#$D4 -+ vextracti64x4 \$0x1,$H0,%y#$D0 -+ vextracti64x4 \$0x1,$H1,%y#$D1 -+ vextracti64x4 \$0x1,$H2,%y#$D2 -+ vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case -+ vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2 -+ vpaddq $D0,$H0,${H0}{%k3}{z} -+ vpaddq $D1,$H1,${H1}{%k3}{z} -+ vpaddq $D2,$H2,${H2}{%k3}{z} -+___ -+map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT)); -+map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK)); -+$code.=<<___; -+ ################################################################ -+ # lazy reduction (interleaved with input splat) -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpsrldq \$6,$T0,$T2 # splat input -+ vpsrldq \$6,$T1,$T3 -+ vpunpckhqdq $T1,$T0,$T4 # 4 -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpunpcklqdq $T3,$T2,$T2 # 2:3 -+ vpunpcklqdq $T1,$T0,$T0 # 0:1 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H4,$D4 -+ vpand $MASK,$H4,$H4 -+ -+ vpsrlq \$26,$H1,$D1 -+ vpand $MASK,$H1,$H1 -+ vpsrlq \$30,$T2,$T3 -+ vpsrlq \$4,$T2,$T2 -+ vpaddq $D1,$H2,$H2 # h1 -> h2 -+ -+ vpaddq $D4,$H0,$H0 -+ vpsllq \$2,$D4,$D4 -+ vpsrlq \$26,$T0,$T1 -+ vpsrlq \$40,$T4,$T4 # 4 -+ vpaddq $D4,$H0,$H0 # h4 -> h0 -+ -+ vpsrlq \$26,$H2,$D2 -+ vpand $MASK,$H2,$H2 -+ vpand $MASK,$T2,$T2 # 2 -+ vpand $MASK,$T0,$T0 # 0 -+ vpaddq $D2,$H3,$H3 # h2 -> h3 -+ -+ vpsrlq \$26,$H0,$D0 -+ vpand $MASK,$H0,$H0 -+ vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2 -+ vpand $MASK,$T1,$T1 # 1 -+ vpaddq $D0,$H1,$H1 # h0 -> h1 -+ -+ vpsrlq \$26,$H3,$D3 -+ vpand $MASK,$H3,$H3 -+ vpand $MASK,$T3,$T3 # 3 -+ vpor 32(%rcx),$T4,$T4 # padbit, yes, always -+ vpaddq $D3,$H4,$H4 # h3 -> h4 -+ -+ lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 -+ add \$64,$len -+ jnz .Ltail_avx2 -+ -+ vpsubq $T2,$H2,$H2 # undo input accumulation -+ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced -+ vmovd %x#$H1,`4*1-48-64`($ctx) -+ vmovd %x#$H2,`4*2-48-64`($ctx) -+ vmovd %x#$H3,`4*3-48-64`($ctx) -+ vmovd %x#$H4,`4*4-48-64`($ctx) -+ vzeroall -+___ -+$code.=<<___ if ($win64); -+ movdqa 0x50(%r11),%xmm6 -+ movdqa 0x60(%r11),%xmm7 -+ movdqa 0x70(%r11),%xmm8 -+ movdqa 0x80(%r11),%xmm9 -+ movdqa 0x90(%r11),%xmm10 -+ movdqa 0xa0(%r11),%xmm11 -+ movdqa 0xb0(%r11),%xmm12 -+ movdqa 0xc0(%r11),%xmm13 -+ movdqa 0xd0(%r11),%xmm14 -+ movdqa 0xe0(%r11),%xmm15 -+ lea 0xf8(%r11),%rsp -+.Ldo_avx512_epilogue: -+___ -+$code.=<<___ if (!$win64); -+ lea 8(%r11),%rsp -+.cfi_def_cfa %rsp,8 -+___ -+$code.=<<___; -+ ret -+.cfi_endproc -+.size poly1305_blocks_avx512,.-poly1305_blocks_avx512 -+___ -+if ($avx>3) { -+######################################################################## -+# VPMADD52 version using 2^44 radix. -+# -+# One can argue that base 2^52 would be more natural. Well, even though -+# some operations would be more natural, one has to recognize couple of -+# things. Base 2^52 doesn't provide advantage over base 2^44 if you look -+# at amount of multiply-n-accumulate operations. Secondly, it makes it -+# impossible to pre-compute multiples of 5 [referred to as s[]/sN in -+# reference implementations], which means that more such operations -+# would have to be performed in inner loop, which in turn makes critical -+# path longer. In other words, even though base 2^44 reduction might -+# look less elegant, overall critical path is actually shorter... -+ -+######################################################################## -+# Layout of opaque area is following. -+# -+# unsigned __int64 h[3]; # current hash value base 2^44 -+# unsigned __int64 s[2]; # key value*20 base 2^44 -+# unsigned __int64 r[3]; # key value base 2^44 -+# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4]; -+# # r^n positions reflect -+# # placement in register, not -+# # memory, R[3] is R[1]*20 -+ -+$code.=<<___; -+.type poly1305_init_base2_44,\@function,3 -+.align 32 -+poly1305_init_base2_44: -+ xor %rax,%rax -+ mov %rax,0($ctx) # initialize hash value -+ mov %rax,8($ctx) -+ mov %rax,16($ctx) -+ -+.Linit_base2_44: -+ lea poly1305_blocks_vpmadd52(%rip),%r10 -+ lea poly1305_emit_base2_44(%rip),%r11 -+ -+ mov \$0x0ffffffc0fffffff,%rax -+ mov \$0x0ffffffc0ffffffc,%rcx -+ and 0($inp),%rax -+ mov \$0x00000fffffffffff,%r8 -+ and 8($inp),%rcx -+ mov \$0x00000fffffffffff,%r9 -+ and %rax,%r8 -+ shrd \$44,%rcx,%rax -+ mov %r8,40($ctx) # r0 -+ and %r9,%rax -+ shr \$24,%rcx -+ mov %rax,48($ctx) # r1 -+ lea (%rax,%rax,4),%rax # *5 -+ mov %rcx,56($ctx) # r2 -+ shl \$2,%rax # magic <<2 -+ lea (%rcx,%rcx,4),%rcx # *5 -+ shl \$2,%rcx # magic <<2 -+ mov %rax,24($ctx) # s1 -+ mov %rcx,32($ctx) # s2 -+ movq \$-1,64($ctx) # write impossible value -+___ -+$code.=<<___ if ($flavour !~ /elf32/); -+ mov %r10,0(%rdx) -+ mov %r11,8(%rdx) -+___ -+$code.=<<___ if ($flavour =~ /elf32/); -+ mov %r10d,0(%rdx) -+ mov %r11d,4(%rdx) -+___ -+$code.=<<___; -+ mov \$1,%eax -+ ret -+.size poly1305_init_base2_44,.-poly1305_init_base2_44 -+___ -+{ -+my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17)); -+my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21)); -+my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25)); -+ -+$code.=<<___; -+.type poly1305_blocks_vpmadd52,\@function,4 -+.align 32 -+poly1305_blocks_vpmadd52: -+ shr \$4,$len -+ jz .Lno_data_vpmadd52 # too short -+ -+ shl \$40,$padbit -+ mov 64($ctx),%r8 # peek on power of the key -+ -+ # if powers of the key are not calculated yet, process up to 3 -+ # blocks with this single-block subroutine, otherwise ensure that -+ # length is divisible by 2 blocks and pass the rest down to next -+ # subroutine... -+ -+ mov \$3,%rax -+ mov \$1,%r10 -+ cmp \$4,$len # is input long -+ cmovae %r10,%rax -+ test %r8,%r8 # is power value impossible? -+ cmovns %r10,%rax -+ -+ and $len,%rax # is input of favourable length? -+ jz .Lblocks_vpmadd52_4x -+ -+ sub %rax,$len -+ mov \$7,%r10d -+ mov \$1,%r11d -+ kmovw %r10d,%k7 -+ lea .L2_44_inp_permd(%rip),%r10 -+ kmovw %r11d,%k1 -+ -+ vmovq $padbit,%x#$PAD -+ vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd -+ vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift -+ vpermq \$0xcf,$PAD,$PAD -+ vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask -+ -+ vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value -+ vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys -+ vmovdqu64 32($ctx),${r1r0s2}{%k7}{z} -+ vmovdqu64 24($ctx),${r0s2s1}{%k7}{z} -+ -+ vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt -+ vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft -+ -+ jmp .Loop_vpmadd52 -+ -+.align 32 -+.Loop_vpmadd52: -+ vmovdqu32 0($inp),%x#$T0 # load input as ----3210 -+ lea 16($inp),$inp -+ -+ vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110 -+ vpsrlvq $inp_shift,$T0,$T0 -+ vpandq $reduc_mask,$T0,$T0 -+ vporq $PAD,$T0,$T0 -+ -+ vpaddq $T0,$Dlo,$Dlo # accumulate input -+ -+ vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value -+ vpermq \$0b01010101,$Dlo,${H1}{%k7}{z} -+ vpermq \$0b10101010,$Dlo,${H2}{%k7}{z} -+ -+ vpxord $Dlo,$Dlo,$Dlo -+ vpxord $Dhi,$Dhi,$Dhi -+ -+ vpmadd52luq $r2r1r0,$H0,$Dlo -+ vpmadd52huq $r2r1r0,$H0,$Dhi -+ -+ vpmadd52luq $r1r0s2,$H1,$Dlo -+ vpmadd52huq $r1r0s2,$H1,$Dhi -+ -+ vpmadd52luq $r0s2s1,$H2,$Dlo -+ vpmadd52huq $r0s2s1,$H2,$Dhi -+ -+ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword -+ vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword -+ vpandq $reduc_mask,$Dlo,$Dlo -+ -+ vpaddq $T0,$Dhi,$Dhi -+ -+ vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword -+ -+ vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-) -+ -+ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word -+ vpandq $reduc_mask,$Dlo,$Dlo -+ -+ vpermq \$0b10010011,$T0,$T0 -+ -+ vpaddq $T0,$Dlo,$Dlo -+ -+ vpermq \$0b10010011,$Dlo,${T0}{%k1}{z} -+ -+ vpaddq $T0,$Dlo,$Dlo -+ vpsllq \$2,$T0,$T0 -+ -+ vpaddq $T0,$Dlo,$Dlo -+ -+ dec %rax # len-=16 -+ jnz .Loop_vpmadd52 -+ -+ vmovdqu64 $Dlo,0($ctx){%k7} # store hash value -+ -+ test $len,$len -+ jnz .Lblocks_vpmadd52_4x -+ -+.Lno_data_vpmadd52: -+ ret -+.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 -+___ -+} -+{ -+######################################################################## -+# As implied by its name 4x subroutine processes 4 blocks in parallel -+# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power -+# and is handled in 256-bit %ymm registers. -+ -+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); -+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); -+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); -+ -+$code.=<<___; -+.type poly1305_blocks_vpmadd52_4x,\@function,4 -+.align 32 -+poly1305_blocks_vpmadd52_4x: -+ shr \$4,$len -+ jz .Lno_data_vpmadd52_4x # too short -+ -+ shl \$40,$padbit -+ mov 64($ctx),%r8 # peek on power of the key -+ -+.Lblocks_vpmadd52_4x: -+ vpbroadcastq $padbit,$PAD -+ -+ vmovdqa64 .Lx_mask44(%rip),$mask44 -+ mov \$5,%eax -+ vmovdqa64 .Lx_mask42(%rip),$mask42 -+ kmovw %eax,%k1 # used in 2x path -+ -+ test %r8,%r8 # is power value impossible? -+ js .Linit_vpmadd52 # if it is, then init R[4] -+ -+ vmovq 0($ctx),%x#$H0 # load current hash value -+ vmovq 8($ctx),%x#$H1 -+ vmovq 16($ctx),%x#$H2 -+ -+ test \$3,$len # is length 4*n+2? -+ jnz .Lblocks_vpmadd52_2x_do -+ -+.Lblocks_vpmadd52_4x_do: -+ vpbroadcastq 64($ctx),$R0 # load 4th power of the key -+ vpbroadcastq 96($ctx),$R1 -+ vpbroadcastq 128($ctx),$R2 -+ vpbroadcastq 160($ctx),$S1 -+ -+.Lblocks_vpmadd52_4x_key_loaded: -+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4 -+ vpaddq $R2,$S2,$S2 -+ vpsllq \$2,$S2,$S2 -+ -+ test \$7,$len # is len 8*n? -+ jz .Lblocks_vpmadd52_8x -+ -+ vmovdqu64 16*0($inp),$T2 # load data -+ vmovdqu64 16*2($inp),$T3 -+ lea 16*4($inp),$inp -+ -+ vpunpcklqdq $T3,$T2,$T1 # transpose data -+ vpunpckhqdq $T3,$T2,$T3 -+ -+ # at this point 64-bit lanes are ordered as 3-1-2-0 -+ -+ vpsrlq \$24,$T3,$T2 # splat the data -+ vporq $PAD,$T2,$T2 -+ vpaddq $T2,$H2,$H2 # accumulate input -+ vpandq $mask44,$T1,$T0 -+ vpsrlq \$44,$T1,$T1 -+ vpsllq \$20,$T3,$T3 -+ vporq $T3,$T1,$T1 -+ vpandq $mask44,$T1,$T1 -+ -+ sub \$4,$len -+ jz .Ltail_vpmadd52_4x -+ jmp .Loop_vpmadd52_4x -+ ud2 -+ -+.align 32 -+.Linit_vpmadd52: -+ vmovq 24($ctx),%x#$S1 # load key -+ vmovq 56($ctx),%x#$H2 -+ vmovq 32($ctx),%x#$S2 -+ vmovq 40($ctx),%x#$R0 -+ vmovq 48($ctx),%x#$R1 -+ -+ vmovdqa $R0,$H0 -+ vmovdqa $R1,$H1 -+ vmovdqa $H2,$R2 -+ -+ mov \$2,%eax -+ -+.Lmul_init_vpmadd52: -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $H2,$S1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $H2,$S1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $H2,$S2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $H2,$S2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $H2,$R0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $H2,$R0,$D2hi -+ -+ vpmadd52luq $H0,$R0,$D0lo -+ vpmadd52huq $H0,$R0,$D0hi -+ vpmadd52luq $H0,$R1,$D1lo -+ vpmadd52huq $H0,$R1,$D1hi -+ vpmadd52luq $H0,$R2,$D2lo -+ vpmadd52huq $H0,$R2,$D2hi -+ -+ vpmadd52luq $H1,$S2,$D0lo -+ vpmadd52huq $H1,$S2,$D0hi -+ vpmadd52luq $H1,$R0,$D1lo -+ vpmadd52huq $H1,$R0,$D1hi -+ vpmadd52luq $H1,$R1,$D2lo -+ vpmadd52huq $H1,$R1,$D2hi -+ -+ ################################################################ -+ # partial reduction -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$H0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$H1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$H2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ -+ vpsrlq \$44,$H0,$tmp # additional step -+ vpandq $mask44,$H0,$H0 -+ -+ vpaddq $tmp,$H1,$H1 -+ -+ dec %eax -+ jz .Ldone_init_vpmadd52 -+ -+ vpunpcklqdq $R1,$H1,$R1 # 1,2 -+ vpbroadcastq %x#$H1,%x#$H1 # 2,2 -+ vpunpcklqdq $R2,$H2,$R2 -+ vpbroadcastq %x#$H2,%x#$H2 -+ vpunpcklqdq $R0,$H0,$R0 -+ vpbroadcastq %x#$H0,%x#$H0 -+ -+ vpsllq \$2,$R1,$S1 # S1 = R1*5*4 -+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4 -+ vpaddq $R1,$S1,$S1 -+ vpaddq $R2,$S2,$S2 -+ vpsllq \$2,$S1,$S1 -+ vpsllq \$2,$S2,$S2 -+ -+ jmp .Lmul_init_vpmadd52 -+ ud2 -+ -+.align 32 -+.Ldone_init_vpmadd52: -+ vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4 -+ vinserti128 \$1,%x#$R2,$H2,$R2 -+ vinserti128 \$1,%x#$R0,$H0,$R0 -+ -+ vpermq \$0b11011000,$R1,$R1 # 1,3,2,4 -+ vpermq \$0b11011000,$R2,$R2 -+ vpermq \$0b11011000,$R0,$R0 -+ -+ vpsllq \$2,$R1,$S1 # S1 = R1*5*4 -+ vpaddq $R1,$S1,$S1 -+ vpsllq \$2,$S1,$S1 -+ -+ vmovq 0($ctx),%x#$H0 # load current hash value -+ vmovq 8($ctx),%x#$H1 -+ vmovq 16($ctx),%x#$H2 -+ -+ test \$3,$len # is length 4*n+2? -+ jnz .Ldone_init_vpmadd52_2x -+ -+ vmovdqu64 $R0,64($ctx) # save key powers -+ vpbroadcastq %x#$R0,$R0 # broadcast 4th power -+ vmovdqu64 $R1,96($ctx) -+ vpbroadcastq %x#$R1,$R1 -+ vmovdqu64 $R2,128($ctx) -+ vpbroadcastq %x#$R2,$R2 -+ vmovdqu64 $S1,160($ctx) -+ vpbroadcastq %x#$S1,$S1 -+ -+ jmp .Lblocks_vpmadd52_4x_key_loaded -+ ud2 -+ -+.align 32 -+.Ldone_init_vpmadd52_2x: -+ vmovdqu64 $R0,64($ctx) # save key powers -+ vpsrldq \$8,$R0,$R0 # 0-1-0-2 -+ vmovdqu64 $R1,96($ctx) -+ vpsrldq \$8,$R1,$R1 -+ vmovdqu64 $R2,128($ctx) -+ vpsrldq \$8,$R2,$R2 -+ vmovdqu64 $S1,160($ctx) -+ vpsrldq \$8,$S1,$S1 -+ jmp .Lblocks_vpmadd52_2x_key_loaded -+ ud2 -+ -+.align 32 -+.Lblocks_vpmadd52_2x_do: -+ vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers -+ vmovdqu64 160+8($ctx),${S1}{%k1}{z} -+ vmovdqu64 64+8($ctx),${R0}{%k1}{z} -+ vmovdqu64 96+8($ctx),${R1}{%k1}{z} -+ -+.Lblocks_vpmadd52_2x_key_loaded: -+ vmovdqu64 16*0($inp),$T2 # load data -+ vpxorq $T3,$T3,$T3 -+ lea 16*2($inp),$inp -+ -+ vpunpcklqdq $T3,$T2,$T1 # transpose data -+ vpunpckhqdq $T3,$T2,$T3 -+ -+ # at this point 64-bit lanes are ordered as x-1-x-0 -+ -+ vpsrlq \$24,$T3,$T2 # splat the data -+ vporq $PAD,$T2,$T2 -+ vpaddq $T2,$H2,$H2 # accumulate input -+ vpandq $mask44,$T1,$T0 -+ vpsrlq \$44,$T1,$T1 -+ vpsllq \$20,$T3,$T3 -+ vporq $T3,$T1,$T1 -+ vpandq $mask44,$T1,$T1 -+ -+ jmp .Ltail_vpmadd52_2x -+ ud2 -+ -+.align 32 -+.Loop_vpmadd52_4x: -+ #vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $T0,$H0,$H0 -+ vpaddq $T1,$H1,$H1 -+ -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $H2,$S1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $H2,$S1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $H2,$S2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $H2,$S2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $H2,$R0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $H2,$R0,$D2hi -+ -+ vmovdqu64 16*0($inp),$T2 # load data -+ vmovdqu64 16*2($inp),$T3 -+ lea 16*4($inp),$inp -+ vpmadd52luq $H0,$R0,$D0lo -+ vpmadd52huq $H0,$R0,$D0hi -+ vpmadd52luq $H0,$R1,$D1lo -+ vpmadd52huq $H0,$R1,$D1hi -+ vpmadd52luq $H0,$R2,$D2lo -+ vpmadd52huq $H0,$R2,$D2hi -+ -+ vpunpcklqdq $T3,$T2,$T1 # transpose data -+ vpunpckhqdq $T3,$T2,$T3 -+ vpmadd52luq $H1,$S2,$D0lo -+ vpmadd52huq $H1,$S2,$D0hi -+ vpmadd52luq $H1,$R0,$D1lo -+ vpmadd52huq $H1,$R0,$D1hi -+ vpmadd52luq $H1,$R1,$D2lo -+ vpmadd52huq $H1,$R1,$D2hi -+ -+ ################################################################ -+ # partial reduction (interleaved with data splat) -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$H0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpsrlq \$24,$T3,$T2 -+ vporq $PAD,$T2,$T2 -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$H1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpandq $mask44,$T1,$T0 -+ vpsrlq \$44,$T1,$T1 -+ vpsllq \$20,$T3,$T3 -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$H2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $D2hi,$H0,$H0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ vporq $T3,$T1,$T1 -+ vpandq $mask44,$T1,$T1 -+ -+ vpsrlq \$44,$H0,$tmp # additional step -+ vpandq $mask44,$H0,$H0 -+ -+ vpaddq $tmp,$H1,$H1 -+ -+ sub \$4,$len # len-=64 -+ jnz .Loop_vpmadd52_4x -+ -+.Ltail_vpmadd52_4x: -+ vmovdqu64 128($ctx),$R2 # load all key powers -+ vmovdqu64 160($ctx),$S1 -+ vmovdqu64 64($ctx),$R0 -+ vmovdqu64 96($ctx),$R1 -+ -+.Ltail_vpmadd52_2x: -+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4 -+ vpaddq $R2,$S2,$S2 -+ vpsllq \$2,$S2,$S2 -+ -+ #vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $T0,$H0,$H0 -+ vpaddq $T1,$H1,$H1 -+ -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $H2,$S1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $H2,$S1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $H2,$S2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $H2,$S2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $H2,$R0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $H2,$R0,$D2hi -+ -+ vpmadd52luq $H0,$R0,$D0lo -+ vpmadd52huq $H0,$R0,$D0hi -+ vpmadd52luq $H0,$R1,$D1lo -+ vpmadd52huq $H0,$R1,$D1hi -+ vpmadd52luq $H0,$R2,$D2lo -+ vpmadd52huq $H0,$R2,$D2hi -+ -+ vpmadd52luq $H1,$S2,$D0lo -+ vpmadd52huq $H1,$S2,$D0hi -+ vpmadd52luq $H1,$R0,$D1lo -+ vpmadd52huq $H1,$R0,$D1hi -+ vpmadd52luq $H1,$R1,$D2lo -+ vpmadd52huq $H1,$R1,$D2hi -+ -+ ################################################################ -+ # horizontal addition -+ -+ mov \$1,%eax -+ kmovw %eax,%k1 -+ vpsrldq \$8,$D0lo,$T0 -+ vpsrldq \$8,$D0hi,$H0 -+ vpsrldq \$8,$D1lo,$T1 -+ vpsrldq \$8,$D1hi,$H1 -+ vpaddq $T0,$D0lo,$D0lo -+ vpaddq $H0,$D0hi,$D0hi -+ vpsrldq \$8,$D2lo,$T2 -+ vpsrldq \$8,$D2hi,$H2 -+ vpaddq $T1,$D1lo,$D1lo -+ vpaddq $H1,$D1hi,$D1hi -+ vpermq \$0x2,$D0lo,$T0 -+ vpermq \$0x2,$D0hi,$H0 -+ vpaddq $T2,$D2lo,$D2lo -+ vpaddq $H2,$D2hi,$D2hi -+ -+ vpermq \$0x2,$D1lo,$T1 -+ vpermq \$0x2,$D1hi,$H1 -+ vpaddq $T0,$D0lo,${D0lo}{%k1}{z} -+ vpaddq $H0,$D0hi,${D0hi}{%k1}{z} -+ vpermq \$0x2,$D2lo,$T2 -+ vpermq \$0x2,$D2hi,$H2 -+ vpaddq $T1,$D1lo,${D1lo}{%k1}{z} -+ vpaddq $H1,$D1hi,${D1hi}{%k1}{z} -+ vpaddq $T2,$D2lo,${D2lo}{%k1}{z} -+ vpaddq $H2,$D2hi,${D2hi}{%k1}{z} -+ -+ ################################################################ -+ # partial reduction -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$H0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$H1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$H2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ -+ vpsrlq \$44,$H0,$tmp # additional step -+ vpandq $mask44,$H0,$H0 -+ -+ vpaddq $tmp,$H1,$H1 -+ # at this point $len is -+ # either 4*n+2 or 0... -+ sub \$2,$len # len-=32 -+ ja .Lblocks_vpmadd52_4x_do -+ -+ vmovq %x#$H0,0($ctx) -+ vmovq %x#$H1,8($ctx) -+ vmovq %x#$H2,16($ctx) -+ vzeroall -+ -+.Lno_data_vpmadd52_4x: -+ ret -+.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x -+___ -+} -+{ -+######################################################################## -+# As implied by its name 8x subroutine processes 8 blocks in parallel... -+# This is intermediate version, as it's used only in cases when input -+# length is either 8*n, 8*n+1 or 8*n+2... -+ -+my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17)); -+my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23)); -+my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31)); -+my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10)); -+ -+$code.=<<___; -+.type poly1305_blocks_vpmadd52_8x,\@function,4 -+.align 32 -+poly1305_blocks_vpmadd52_8x: -+ shr \$4,$len -+ jz .Lno_data_vpmadd52_8x # too short -+ -+ shl \$40,$padbit -+ mov 64($ctx),%r8 # peek on power of the key -+ -+ vmovdqa64 .Lx_mask44(%rip),$mask44 -+ vmovdqa64 .Lx_mask42(%rip),$mask42 -+ -+ test %r8,%r8 # is power value impossible? -+ js .Linit_vpmadd52 # if it is, then init R[4] -+ -+ vmovq 0($ctx),%x#$H0 # load current hash value -+ vmovq 8($ctx),%x#$H1 -+ vmovq 16($ctx),%x#$H2 -+ -+.Lblocks_vpmadd52_8x: -+ ################################################################ -+ # fist we calculate more key powers -+ -+ vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers -+ vmovdqu64 160($ctx),$S1 -+ vmovdqu64 64($ctx),$R0 -+ vmovdqu64 96($ctx),$R1 -+ -+ vpsllq \$2,$R2,$S2 # S2 = R2*5*4 -+ vpaddq $R2,$S2,$S2 -+ vpsllq \$2,$S2,$S2 -+ -+ vpbroadcastq %x#$R2,$RR2 # broadcast 4th power -+ vpbroadcastq %x#$R0,$RR0 -+ vpbroadcastq %x#$R1,$RR1 -+ -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $RR2,$S1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $RR2,$S1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $RR2,$S2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $RR2,$S2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $RR2,$R0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $RR2,$R0,$D2hi -+ -+ vpmadd52luq $RR0,$R0,$D0lo -+ vpmadd52huq $RR0,$R0,$D0hi -+ vpmadd52luq $RR0,$R1,$D1lo -+ vpmadd52huq $RR0,$R1,$D1hi -+ vpmadd52luq $RR0,$R2,$D2lo -+ vpmadd52huq $RR0,$R2,$D2hi -+ -+ vpmadd52luq $RR1,$S2,$D0lo -+ vpmadd52huq $RR1,$S2,$D0hi -+ vpmadd52luq $RR1,$R0,$D1lo -+ vpmadd52huq $RR1,$R0,$D1hi -+ vpmadd52luq $RR1,$R1,$D2lo -+ vpmadd52huq $RR1,$R1,$D2hi -+ -+ ################################################################ -+ # partial reduction -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$RR0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$RR1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$RR2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$RR0,$RR0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$RR0,$RR0 -+ -+ vpsrlq \$44,$RR0,$tmp # additional step -+ vpandq $mask44,$RR0,$RR0 -+ -+ vpaddq $tmp,$RR1,$RR1 -+ -+ ################################################################ -+ # At this point Rx holds 1324 powers, RRx - 5768, and the goal -+ # is 15263748, which reflects how data is loaded... -+ -+ vpunpcklqdq $R2,$RR2,$T2 # 3748 -+ vpunpckhqdq $R2,$RR2,$R2 # 1526 -+ vpunpcklqdq $R0,$RR0,$T0 -+ vpunpckhqdq $R0,$RR0,$R0 -+ vpunpcklqdq $R1,$RR1,$T1 -+ vpunpckhqdq $R1,$RR1,$R1 -+___ -+######## switch to %zmm -+map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); -+map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); -+map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); -+map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2); -+ -+$code.=<<___; -+ vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748 -+ vshufi64x2 \$0x44,$R0,$T0,$RR0 -+ vshufi64x2 \$0x44,$R1,$T1,$RR1 -+ -+ vmovdqu64 16*0($inp),$T2 # load data -+ vmovdqu64 16*4($inp),$T3 -+ lea 16*8($inp),$inp -+ -+ vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4 -+ vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4 -+ vpaddq $RR2,$SS2,$SS2 -+ vpaddq $RR1,$SS1,$SS1 -+ vpsllq \$2,$SS2,$SS2 -+ vpsllq \$2,$SS1,$SS1 -+ -+ vpbroadcastq $padbit,$PAD -+ vpbroadcastq %x#$mask44,$mask44 -+ vpbroadcastq %x#$mask42,$mask42 -+ -+ vpbroadcastq %x#$SS1,$S1 # broadcast 8th power -+ vpbroadcastq %x#$SS2,$S2 -+ vpbroadcastq %x#$RR0,$R0 -+ vpbroadcastq %x#$RR1,$R1 -+ vpbroadcastq %x#$RR2,$R2 -+ -+ vpunpcklqdq $T3,$T2,$T1 # transpose data -+ vpunpckhqdq $T3,$T2,$T3 -+ -+ # at this point 64-bit lanes are ordered as 73625140 -+ -+ vpsrlq \$24,$T3,$T2 # splat the data -+ vporq $PAD,$T2,$T2 -+ vpaddq $T2,$H2,$H2 # accumulate input -+ vpandq $mask44,$T1,$T0 -+ vpsrlq \$44,$T1,$T1 -+ vpsllq \$20,$T3,$T3 -+ vporq $T3,$T1,$T1 -+ vpandq $mask44,$T1,$T1 -+ -+ sub \$8,$len -+ jz .Ltail_vpmadd52_8x -+ jmp .Loop_vpmadd52_8x -+ -+.align 32 -+.Loop_vpmadd52_8x: -+ #vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $T0,$H0,$H0 -+ vpaddq $T1,$H1,$H1 -+ -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $H2,$S1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $H2,$S1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $H2,$S2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $H2,$S2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $H2,$R0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $H2,$R0,$D2hi -+ -+ vmovdqu64 16*0($inp),$T2 # load data -+ vmovdqu64 16*4($inp),$T3 -+ lea 16*8($inp),$inp -+ vpmadd52luq $H0,$R0,$D0lo -+ vpmadd52huq $H0,$R0,$D0hi -+ vpmadd52luq $H0,$R1,$D1lo -+ vpmadd52huq $H0,$R1,$D1hi -+ vpmadd52luq $H0,$R2,$D2lo -+ vpmadd52huq $H0,$R2,$D2hi -+ -+ vpunpcklqdq $T3,$T2,$T1 # transpose data -+ vpunpckhqdq $T3,$T2,$T3 -+ vpmadd52luq $H1,$S2,$D0lo -+ vpmadd52huq $H1,$S2,$D0hi -+ vpmadd52luq $H1,$R0,$D1lo -+ vpmadd52huq $H1,$R0,$D1hi -+ vpmadd52luq $H1,$R1,$D2lo -+ vpmadd52huq $H1,$R1,$D2hi -+ -+ ################################################################ -+ # partial reduction (interleaved with data splat) -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$H0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpsrlq \$24,$T3,$T2 -+ vporq $PAD,$T2,$T2 -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$H1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpandq $mask44,$T1,$T0 -+ vpsrlq \$44,$T1,$T1 -+ vpsllq \$20,$T3,$T3 -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$H2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $D2hi,$H0,$H0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ vporq $T3,$T1,$T1 -+ vpandq $mask44,$T1,$T1 -+ -+ vpsrlq \$44,$H0,$tmp # additional step -+ vpandq $mask44,$H0,$H0 -+ -+ vpaddq $tmp,$H1,$H1 -+ -+ sub \$8,$len # len-=128 -+ jnz .Loop_vpmadd52_8x -+ -+.Ltail_vpmadd52_8x: -+ #vpaddq $T2,$H2,$H2 # accumulate input -+ vpaddq $T0,$H0,$H0 -+ vpaddq $T1,$H1,$H1 -+ -+ vpxorq $D0lo,$D0lo,$D0lo -+ vpmadd52luq $H2,$SS1,$D0lo -+ vpxorq $D0hi,$D0hi,$D0hi -+ vpmadd52huq $H2,$SS1,$D0hi -+ vpxorq $D1lo,$D1lo,$D1lo -+ vpmadd52luq $H2,$SS2,$D1lo -+ vpxorq $D1hi,$D1hi,$D1hi -+ vpmadd52huq $H2,$SS2,$D1hi -+ vpxorq $D2lo,$D2lo,$D2lo -+ vpmadd52luq $H2,$RR0,$D2lo -+ vpxorq $D2hi,$D2hi,$D2hi -+ vpmadd52huq $H2,$RR0,$D2hi -+ -+ vpmadd52luq $H0,$RR0,$D0lo -+ vpmadd52huq $H0,$RR0,$D0hi -+ vpmadd52luq $H0,$RR1,$D1lo -+ vpmadd52huq $H0,$RR1,$D1hi -+ vpmadd52luq $H0,$RR2,$D2lo -+ vpmadd52huq $H0,$RR2,$D2hi -+ -+ vpmadd52luq $H1,$SS2,$D0lo -+ vpmadd52huq $H1,$SS2,$D0hi -+ vpmadd52luq $H1,$RR0,$D1lo -+ vpmadd52huq $H1,$RR0,$D1hi -+ vpmadd52luq $H1,$RR1,$D2lo -+ vpmadd52huq $H1,$RR1,$D2hi -+ -+ ################################################################ -+ # horizontal addition -+ -+ mov \$1,%eax -+ kmovw %eax,%k1 -+ vpsrldq \$8,$D0lo,$T0 -+ vpsrldq \$8,$D0hi,$H0 -+ vpsrldq \$8,$D1lo,$T1 -+ vpsrldq \$8,$D1hi,$H1 -+ vpaddq $T0,$D0lo,$D0lo -+ vpaddq $H0,$D0hi,$D0hi -+ vpsrldq \$8,$D2lo,$T2 -+ vpsrldq \$8,$D2hi,$H2 -+ vpaddq $T1,$D1lo,$D1lo -+ vpaddq $H1,$D1hi,$D1hi -+ vpermq \$0x2,$D0lo,$T0 -+ vpermq \$0x2,$D0hi,$H0 -+ vpaddq $T2,$D2lo,$D2lo -+ vpaddq $H2,$D2hi,$D2hi -+ -+ vpermq \$0x2,$D1lo,$T1 -+ vpermq \$0x2,$D1hi,$H1 -+ vpaddq $T0,$D0lo,$D0lo -+ vpaddq $H0,$D0hi,$D0hi -+ vpermq \$0x2,$D2lo,$T2 -+ vpermq \$0x2,$D2hi,$H2 -+ vpaddq $T1,$D1lo,$D1lo -+ vpaddq $H1,$D1hi,$D1hi -+ vextracti64x4 \$1,$D0lo,%y#$T0 -+ vextracti64x4 \$1,$D0hi,%y#$H0 -+ vpaddq $T2,$D2lo,$D2lo -+ vpaddq $H2,$D2hi,$D2hi -+ -+ vextracti64x4 \$1,$D1lo,%y#$T1 -+ vextracti64x4 \$1,$D1hi,%y#$H1 -+ vextracti64x4 \$1,$D2lo,%y#$T2 -+ vextracti64x4 \$1,$D2hi,%y#$H2 -+___ -+######## switch back to %ymm -+map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2); -+map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi); -+map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD); -+ -+$code.=<<___; -+ vpaddq $T0,$D0lo,${D0lo}{%k1}{z} -+ vpaddq $H0,$D0hi,${D0hi}{%k1}{z} -+ vpaddq $T1,$D1lo,${D1lo}{%k1}{z} -+ vpaddq $H1,$D1hi,${D1hi}{%k1}{z} -+ vpaddq $T2,$D2lo,${D2lo}{%k1}{z} -+ vpaddq $H2,$D2hi,${D2hi}{%k1}{z} -+ -+ ################################################################ -+ # partial reduction -+ vpsrlq \$44,$D0lo,$tmp -+ vpsllq \$8,$D0hi,$D0hi -+ vpandq $mask44,$D0lo,$H0 -+ vpaddq $tmp,$D0hi,$D0hi -+ -+ vpaddq $D0hi,$D1lo,$D1lo -+ -+ vpsrlq \$44,$D1lo,$tmp -+ vpsllq \$8,$D1hi,$D1hi -+ vpandq $mask44,$D1lo,$H1 -+ vpaddq $tmp,$D1hi,$D1hi -+ -+ vpaddq $D1hi,$D2lo,$D2lo -+ -+ vpsrlq \$42,$D2lo,$tmp -+ vpsllq \$10,$D2hi,$D2hi -+ vpandq $mask42,$D2lo,$H2 -+ vpaddq $tmp,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ vpsllq \$2,$D2hi,$D2hi -+ -+ vpaddq $D2hi,$H0,$H0 -+ -+ vpsrlq \$44,$H0,$tmp # additional step -+ vpandq $mask44,$H0,$H0 -+ -+ vpaddq $tmp,$H1,$H1 -+ -+ ################################################################ -+ -+ vmovq %x#$H0,0($ctx) -+ vmovq %x#$H1,8($ctx) -+ vmovq %x#$H2,16($ctx) -+ vzeroall -+ -+.Lno_data_vpmadd52_8x: -+ ret -+.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x -+___ -+} -+$code.=<<___; -+.type poly1305_emit_base2_44,\@function,3 -+.align 32 -+poly1305_emit_base2_44: -+ mov 0($ctx),%r8 # load hash value -+ mov 8($ctx),%r9 -+ mov 16($ctx),%r10 -+ -+ mov %r9,%rax -+ shr \$20,%r9 -+ shl \$44,%rax -+ mov %r10,%rcx -+ shr \$40,%r10 -+ shl \$24,%rcx -+ -+ add %rax,%r8 -+ adc %rcx,%r9 -+ adc \$0,%r10 -+ -+ mov %r8,%rax -+ add \$5,%r8 # compare to modulus -+ mov %r9,%rcx -+ adc \$0,%r9 -+ adc \$0,%r10 -+ shr \$2,%r10 # did 130-bit value overflow? -+ cmovnz %r8,%rax -+ cmovnz %r9,%rcx -+ -+ add 0($nonce),%rax # accumulate nonce -+ adc 8($nonce),%rcx -+ mov %rax,0($mac) # write result -+ mov %rcx,8($mac) -+ -+ ret -+.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 -+___ -+} } } -+$code.=<<___; -+.align 64 -+.Lconst: -+.Lmask24: -+.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 -+.L129: -+.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 -+.Lmask26: -+.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 -+.Lpermd_avx2: -+.long 2,2,2,3,2,0,2,1 -+.Lpermd_avx512: -+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 -+ -+.L2_44_inp_permd: -+.long 0,1,1,2,2,3,7,7 -+.L2_44_inp_shift: -+.quad 0,12,24,64 -+.L2_44_mask: -+.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff -+.L2_44_shift_rgt: -+.quad 44,44,42,64 -+.L2_44_shift_lft: -+.quad 8,8,10,64 -+ -+.align 64 -+.Lx_mask44: -+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff -+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff -+.Lx_mask42: -+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff -+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff -+___ -+} -+$code.=<<___; -+.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" -+.align 16 -+___ -+ -+{ # chacha20-poly1305 helpers -+my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order -+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order -+$code.=<<___; -+.globl xor128_encrypt_n_pad -+.type xor128_encrypt_n_pad,\@abi-omnipotent -+.align 16 -+xor128_encrypt_n_pad: -+ sub $otp,$inp -+ sub $otp,$out -+ mov $len,%r10 # put len aside -+ shr \$4,$len # len / 16 -+ jz .Ltail_enc -+ nop -+.Loop_enc_xmm: -+ movdqu ($inp,$otp),%xmm0 -+ pxor ($otp),%xmm0 -+ movdqu %xmm0,($out,$otp) -+ movdqa %xmm0,($otp) -+ lea 16($otp),$otp -+ dec $len -+ jnz .Loop_enc_xmm -+ -+ and \$15,%r10 # len % 16 -+ jz .Ldone_enc -+ -+.Ltail_enc: -+ mov \$16,$len -+ sub %r10,$len -+ xor %eax,%eax -+.Loop_enc_byte: -+ mov ($inp,$otp),%al -+ xor ($otp),%al -+ mov %al,($out,$otp) -+ mov %al,($otp) -+ lea 1($otp),$otp -+ dec %r10 -+ jnz .Loop_enc_byte -+ -+ xor %eax,%eax -+.Loop_enc_pad: -+ mov %al,($otp) -+ lea 1($otp),$otp -+ dec $len -+ jnz .Loop_enc_pad -+ -+.Ldone_enc: -+ mov $otp,%rax -+ ret -+.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad -+ -+.globl xor128_decrypt_n_pad -+.type xor128_decrypt_n_pad,\@abi-omnipotent -+.align 16 -+xor128_decrypt_n_pad: -+ sub $otp,$inp -+ sub $otp,$out -+ mov $len,%r10 # put len aside -+ shr \$4,$len # len / 16 -+ jz .Ltail_dec -+ nop -+.Loop_dec_xmm: -+ movdqu ($inp,$otp),%xmm0 -+ movdqa ($otp),%xmm1 -+ pxor %xmm0,%xmm1 -+ movdqu %xmm1,($out,$otp) -+ movdqa %xmm0,($otp) -+ lea 16($otp),$otp -+ dec $len -+ jnz .Loop_dec_xmm -+ -+ pxor %xmm1,%xmm1 -+ and \$15,%r10 # len % 16 -+ jz .Ldone_dec -+ -+.Ltail_dec: -+ mov \$16,$len -+ sub %r10,$len -+ xor %eax,%eax -+ xor %r11,%r11 -+.Loop_dec_byte: -+ mov ($inp,$otp),%r11b -+ mov ($otp),%al -+ xor %r11b,%al -+ mov %al,($out,$otp) -+ mov %r11b,($otp) -+ lea 1($otp),$otp -+ dec %r10 -+ jnz .Loop_dec_byte -+ -+ xor %eax,%eax -+.Loop_dec_pad: -+ mov %al,($otp) -+ lea 1($otp),$otp -+ dec $len -+ jnz .Loop_dec_pad -+ -+.Ldone_dec: -+ mov $otp,%rax -+ ret -+.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad -+___ -+} -+ -+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -+# CONTEXT *context,DISPATCHER_CONTEXT *disp) -+if ($win64) { -+$rec="%rcx"; -+$frame="%rdx"; -+$context="%r8"; -+$disp="%r9"; -+ -+$code.=<<___; -+.extern __imp_RtlVirtualUnwind -+.type se_handler,\@abi-omnipotent -+.align 16 -+se_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 120($context),%rax # pull context->Rax -+ mov 248($context),%rbx # pull context->Rip -+ -+ mov 8($disp),%rsi # disp->ImageBase -+ mov 56($disp),%r11 # disp->HandlerData -+ -+ mov 0(%r11),%r10d # HandlerData[0] -+ lea (%rsi,%r10),%r10 # prologue label -+ cmp %r10,%rbx # context->Rip<.Lprologue -+ jb .Lcommon_seh_tail -+ -+ mov 152($context),%rax # pull context->Rsp -+ -+ mov 4(%r11),%r10d # HandlerData[1] -+ lea (%rsi,%r10),%r10 # epilogue label -+ cmp %r10,%rbx # context->Rip>=.Lepilogue -+ jae .Lcommon_seh_tail -+ -+ lea 48(%rax),%rax -+ -+ mov -8(%rax),%rbx -+ mov -16(%rax),%rbp -+ mov -24(%rax),%r12 -+ mov -32(%rax),%r13 -+ mov -40(%rax),%r14 -+ mov -48(%rax),%r15 -+ mov %rbx,144($context) # restore context->Rbx -+ mov %rbp,160($context) # restore context->Rbp -+ mov %r12,216($context) # restore context->R12 -+ mov %r13,224($context) # restore context->R13 -+ mov %r14,232($context) # restore context->R14 -+ mov %r15,240($context) # restore context->R14 -+ -+ jmp .Lcommon_seh_tail -+.size se_handler,.-se_handler -+ -+.type avx_handler,\@abi-omnipotent -+.align 16 -+avx_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 120($context),%rax # pull context->Rax -+ mov 248($context),%rbx # pull context->Rip -+ -+ mov 8($disp),%rsi # disp->ImageBase -+ mov 56($disp),%r11 # disp->HandlerData -+ -+ mov 0(%r11),%r10d # HandlerData[0] -+ lea (%rsi,%r10),%r10 # prologue label -+ cmp %r10,%rbx # context->Rip<prologue label -+ jb .Lcommon_seh_tail -+ -+ mov 152($context),%rax # pull context->Rsp -+ -+ mov 4(%r11),%r10d # HandlerData[1] -+ lea (%rsi,%r10),%r10 # epilogue label -+ cmp %r10,%rbx # context->Rip>=epilogue label -+ jae .Lcommon_seh_tail -+ -+ mov 208($context),%rax # pull context->R11 -+ -+ lea 0x50(%rax),%rsi -+ lea 0xf8(%rax),%rax -+ lea 512($context),%rdi # &context.Xmm6 -+ mov \$20,%ecx -+ .long 0xa548f3fc # cld; rep movsq -+ -+.Lcommon_seh_tail: -+ mov 8(%rax),%rdi -+ mov 16(%rax),%rsi -+ mov %rax,152($context) # restore context->Rsp -+ mov %rsi,168($context) # restore context->Rsi -+ mov %rdi,176($context) # restore context->Rdi -+ -+ mov 40($disp),%rdi # disp->ContextRecord -+ mov $context,%rsi # context -+ mov \$154,%ecx # sizeof(CONTEXT) -+ .long 0xa548f3fc # cld; rep movsq -+ -+ mov $disp,%rsi -+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER -+ mov 8(%rsi),%rdx # arg2, disp->ImageBase -+ mov 0(%rsi),%r8 # arg3, disp->ControlPc -+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry -+ mov 40(%rsi),%r10 # disp->ContextRecord -+ lea 56(%rsi),%r11 # &disp->HandlerData -+ lea 24(%rsi),%r12 # &disp->EstablisherFrame -+ mov %r10,32(%rsp) # arg5 -+ mov %r11,40(%rsp) # arg6 -+ mov %r12,48(%rsp) # arg7 -+ mov %rcx,56(%rsp) # arg8, (NULL) -+ call *__imp_RtlVirtualUnwind(%rip) -+ -+ mov \$1,%eax # ExceptionContinueSearch -+ add \$64,%rsp -+ popfq -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %rbp -+ pop %rbx -+ pop %rdi -+ pop %rsi -+ ret -+.size avx_handler,.-avx_handler -+ -+.section .pdata -+.align 4 -+ .rva .LSEH_begin_poly1305_init -+ .rva .LSEH_end_poly1305_init -+ .rva .LSEH_info_poly1305_init -+ -+ .rva .LSEH_begin_poly1305_blocks -+ .rva .LSEH_end_poly1305_blocks -+ .rva .LSEH_info_poly1305_blocks -+ -+ .rva .LSEH_begin_poly1305_emit -+ .rva .LSEH_end_poly1305_emit -+ .rva .LSEH_info_poly1305_emit -+___ -+$code.=<<___ if ($avx); -+ .rva .LSEH_begin_poly1305_blocks_avx -+ .rva .Lbase2_64_avx -+ .rva .LSEH_info_poly1305_blocks_avx_1 -+ -+ .rva .Lbase2_64_avx -+ .rva .Leven_avx -+ .rva .LSEH_info_poly1305_blocks_avx_2 -+ -+ .rva .Leven_avx -+ .rva .LSEH_end_poly1305_blocks_avx -+ .rva .LSEH_info_poly1305_blocks_avx_3 -+ -+ .rva .LSEH_begin_poly1305_emit_avx -+ .rva .LSEH_end_poly1305_emit_avx -+ .rva .LSEH_info_poly1305_emit_avx -+___ -+$code.=<<___ if ($avx>1); -+ .rva .LSEH_begin_poly1305_blocks_avx2 -+ .rva .Lbase2_64_avx2 -+ .rva .LSEH_info_poly1305_blocks_avx2_1 -+ -+ .rva .Lbase2_64_avx2 -+ .rva .Leven_avx2 -+ .rva .LSEH_info_poly1305_blocks_avx2_2 -+ -+ .rva .Leven_avx2 -+ .rva .LSEH_end_poly1305_blocks_avx2 -+ .rva .LSEH_info_poly1305_blocks_avx2_3 -+___ -+$code.=<<___ if ($avx>2); -+ .rva .LSEH_begin_poly1305_blocks_avx512 -+ .rva .LSEH_end_poly1305_blocks_avx512 -+ .rva .LSEH_info_poly1305_blocks_avx512 -+___ -+$code.=<<___; -+.section .xdata -+.align 8 -+.LSEH_info_poly1305_init: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init -+ -+.LSEH_info_poly1305_blocks: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .Lblocks_body,.Lblocks_epilogue -+ -+.LSEH_info_poly1305_emit: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit -+___ -+$code.=<<___ if ($avx); -+.LSEH_info_poly1305_blocks_avx_1: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[] -+ -+.LSEH_info_poly1305_blocks_avx_2: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[] -+ -+.LSEH_info_poly1305_blocks_avx_3: -+ .byte 9,0,0,0 -+ .rva avx_handler -+ .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[] -+ -+.LSEH_info_poly1305_emit_avx: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx -+___ -+$code.=<<___ if ($avx>1); -+.LSEH_info_poly1305_blocks_avx2_1: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[] -+ -+.LSEH_info_poly1305_blocks_avx2_2: -+ .byte 9,0,0,0 -+ .rva se_handler -+ .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[] -+ -+.LSEH_info_poly1305_blocks_avx2_3: -+ .byte 9,0,0,0 -+ .rva avx_handler -+ .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[] -+___ -+$code.=<<___ if ($avx>2); -+.LSEH_info_poly1305_blocks_avx512: -+ .byte 9,0,0,0 -+ .rva avx_handler -+ .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[] -+___ -+} -+ -+foreach (split('\n',$code)) { -+ s/\`([^\`]*)\`/eval($1)/ge; -+ s/%r([a-z]+)#d/%e$1/g; -+ s/%r([0-9]+)#d/%r$1d/g; -+ s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; -+ -+ print $_,"\n"; -+} -+close STDOUT; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0043-crypto-x86-poly1305-wire-up-faster-implementations-f.patch b/target/linux/generic/backport-5.4/080-wireguard-0043-crypto-x86-poly1305-wire-up-faster-implementations-f.patch deleted file mode 100644 index 0fc8348585..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0043-crypto-x86-poly1305-wire-up-faster-implementations-f.patch +++ /dev/null @@ -1,2927 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 5 Jan 2020 22:40:48 -0500 -Subject: [PATCH] crypto: x86/poly1305 - wire up faster implementations for - kernel - -commit d7d7b853566254648df59f7ea27ea05952a6cfa8 upstream. - -These x86_64 vectorized implementations support AVX, AVX-2, and AVX512F. -The AVX-512F implementation is disabled on Skylake, due to throttling, -but it is quite fast on >= Cannonlake. - -On the left is cycle counts on a Core i7 6700HQ using the AVX-2 -codepath, comparing this implementation ("new") to the implementation in -the current crypto api ("old"). On the right are benchmarks on a Xeon -Gold 5120 using the AVX-512 codepath. The new implementation is faster -on all benchmarks. - - AVX-2 AVX-512 - --------- ----------- - - size old new size old new - ---- ---- ---- ---- ---- ---- - 0 70 68 0 74 70 - 16 92 90 16 96 92 - 32 134 104 32 136 106 - 48 172 120 48 184 124 - 64 218 136 64 218 138 - 80 254 158 80 260 160 - 96 298 174 96 300 176 - 112 342 192 112 342 194 - 128 388 212 128 384 212 - 144 428 228 144 420 226 - 160 466 246 160 464 248 - 176 510 264 176 504 264 - 192 550 282 192 544 282 - 208 594 302 208 582 300 - 224 628 316 224 624 318 - 240 676 334 240 662 338 - 256 716 354 256 708 358 - 272 764 374 272 748 372 - 288 802 352 288 788 358 - 304 420 366 304 422 370 - 320 428 360 320 432 364 - 336 484 378 336 486 380 - 352 426 384 352 434 390 - 368 478 400 368 480 408 - 384 488 394 384 490 398 - 400 542 408 400 542 412 - 416 486 416 416 492 426 - 432 534 430 432 538 436 - 448 544 422 448 546 432 - 464 600 438 464 600 448 - 480 540 448 480 548 456 - 496 594 464 496 594 476 - 512 602 456 512 606 470 - 528 656 476 528 656 480 - 544 600 480 544 606 498 - 560 650 494 560 652 512 - 576 664 490 576 662 508 - 592 714 508 592 716 522 - 608 656 514 608 664 538 - 624 708 532 624 710 552 - 640 716 524 640 720 516 - 656 770 536 656 772 526 - 672 716 548 672 722 544 - 688 770 562 688 768 556 - 704 774 552 704 778 556 - 720 826 568 720 832 568 - 736 768 574 736 780 584 - 752 822 592 752 826 600 - 768 830 584 768 836 560 - 784 884 602 784 888 572 - 800 828 610 800 838 588 - 816 884 628 816 884 604 - 832 888 618 832 894 598 - 848 942 632 848 946 612 - 864 884 644 864 896 628 - 880 936 660 880 942 644 - 896 948 652 896 952 608 - 912 1000 664 912 1004 616 - 928 942 676 928 954 634 - 944 994 690 944 1000 646 - 960 1002 680 960 1008 646 - 976 1054 694 976 1062 658 - 992 1002 706 992 1012 674 - 1008 1052 720 1008 1058 690 - -This commit wires in the prior implementation from Andy, and makes the -following changes to be suitable for kernel land. - - - Some cosmetic and structural changes, like renaming labels to - .Lname, constants, and other Linux conventions, as well as making - the code easy for us to maintain moving forward. - - - CPU feature checking is done in C by the glue code. - - - We avoid jumping into the middle of functions, to appease objtool, - and instead parameterize shared code. - - - We maintain frame pointers so that stack traces make sense. - - - We remove the dependency on the perl xlate code, which transforms - the output into things that assemblers we don't care about use. - -Importantly, none of our changes affect the arithmetic or core code, but -just involve the differing environment of kernel space. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Samuel Neves <sneves@dei.uc.pt> -Co-developed-by: Samuel Neves <sneves@dei.uc.pt> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/.gitignore | 1 + - arch/x86/crypto/Makefile | 11 +- - arch/x86/crypto/poly1305-avx2-x86_64.S | 390 ---------- - arch/x86/crypto/poly1305-sse2-x86_64.S | 590 --------------- - arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 682 ++++++++++-------- - arch/x86/crypto/poly1305_glue.c | 473 +++++------- - lib/crypto/Kconfig | 2 +- - 7 files changed, 572 insertions(+), 1577 deletions(-) - create mode 100644 arch/x86/crypto/.gitignore - delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S - delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S - ---- /dev/null -+++ b/arch/x86/crypto/.gitignore -@@ -0,0 +1 @@ -+poly1305-x86_64.S ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -73,6 +73,10 @@ aegis128-aesni-y := aegis128-aesni-asm.o - - nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o - blake2s-x86_64-y := blake2s-core.o blake2s-glue.o -+poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o -+ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),) -+targets += poly1305-x86_64-cryptogams.S -+endif - - ifeq ($(avx_supported),yes) - camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ -@@ -101,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni - aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o - ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o - sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o --poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o - ifeq ($(avx2_supported),yes) - sha1-ssse3-y += sha1_avx2_x86_64_asm.o --poly1305-x86_64-y += poly1305-avx2-x86_64.o - endif - ifeq ($(sha1_ni_supported),yes) - sha1-ssse3-y += sha1_ni_asm.o -@@ -118,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o - endif - sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o - crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o -+ -+quiet_cmd_perlasm = PERLASM $@ -+ cmd_perlasm = $(PERL) $< > $@ -+$(obj)/%.S: $(src)/%.pl FORCE -+ $(call if_changed,perlasm) ---- a/arch/x86/crypto/poly1305-avx2-x86_64.S -+++ /dev/null -@@ -1,390 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-or-later */ --/* -- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions -- * -- * Copyright (C) 2015 Martin Willi -- */ -- --#include <linux/linkage.h> -- --.section .rodata.cst32.ANMASK, "aM", @progbits, 32 --.align 32 --ANMASK: .octa 0x0000000003ffffff0000000003ffffff -- .octa 0x0000000003ffffff0000000003ffffff -- --.section .rodata.cst32.ORMASK, "aM", @progbits, 32 --.align 32 --ORMASK: .octa 0x00000000010000000000000001000000 -- .octa 0x00000000010000000000000001000000 -- --.text -- --#define h0 0x00(%rdi) --#define h1 0x04(%rdi) --#define h2 0x08(%rdi) --#define h3 0x0c(%rdi) --#define h4 0x10(%rdi) --#define r0 0x00(%rdx) --#define r1 0x04(%rdx) --#define r2 0x08(%rdx) --#define r3 0x0c(%rdx) --#define r4 0x10(%rdx) --#define u0 0x00(%r8) --#define u1 0x04(%r8) --#define u2 0x08(%r8) --#define u3 0x0c(%r8) --#define u4 0x10(%r8) --#define w0 0x18(%r8) --#define w1 0x1c(%r8) --#define w2 0x20(%r8) --#define w3 0x24(%r8) --#define w4 0x28(%r8) --#define y0 0x30(%r8) --#define y1 0x34(%r8) --#define y2 0x38(%r8) --#define y3 0x3c(%r8) --#define y4 0x40(%r8) --#define m %rsi --#define hc0 %ymm0 --#define hc1 %ymm1 --#define hc2 %ymm2 --#define hc3 %ymm3 --#define hc4 %ymm4 --#define hc0x %xmm0 --#define hc1x %xmm1 --#define hc2x %xmm2 --#define hc3x %xmm3 --#define hc4x %xmm4 --#define t1 %ymm5 --#define t2 %ymm6 --#define t1x %xmm5 --#define t2x %xmm6 --#define ruwy0 %ymm7 --#define ruwy1 %ymm8 --#define ruwy2 %ymm9 --#define ruwy3 %ymm10 --#define ruwy4 %ymm11 --#define ruwy0x %xmm7 --#define ruwy1x %xmm8 --#define ruwy2x %xmm9 --#define ruwy3x %xmm10 --#define ruwy4x %xmm11 --#define svxz1 %ymm12 --#define svxz2 %ymm13 --#define svxz3 %ymm14 --#define svxz4 %ymm15 --#define d0 %r9 --#define d1 %r10 --#define d2 %r11 --#define d3 %r12 --#define d4 %r13 -- --ENTRY(poly1305_4block_avx2) -- # %rdi: Accumulator h[5] -- # %rsi: 64 byte input block m -- # %rdx: Poly1305 key r[5] -- # %rcx: Quadblock count -- # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5], -- -- # This four-block variant uses loop unrolled block processing. It -- # requires 4 Poly1305 keys: r, r^2, r^3 and r^4: -- # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r -- -- vzeroupper -- push %rbx -- push %r12 -- push %r13 -- -- # combine r0,u0,w0,y0 -- vmovd y0,ruwy0x -- vmovd w0,t1x -- vpunpcklqdq t1,ruwy0,ruwy0 -- vmovd u0,t1x -- vmovd r0,t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,ruwy0,ruwy0 -- -- # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5 -- vmovd y1,ruwy1x -- vmovd w1,t1x -- vpunpcklqdq t1,ruwy1,ruwy1 -- vmovd u1,t1x -- vmovd r1,t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,ruwy1,ruwy1 -- vpslld $2,ruwy1,svxz1 -- vpaddd ruwy1,svxz1,svxz1 -- -- # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5 -- vmovd y2,ruwy2x -- vmovd w2,t1x -- vpunpcklqdq t1,ruwy2,ruwy2 -- vmovd u2,t1x -- vmovd r2,t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,ruwy2,ruwy2 -- vpslld $2,ruwy2,svxz2 -- vpaddd ruwy2,svxz2,svxz2 -- -- # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5 -- vmovd y3,ruwy3x -- vmovd w3,t1x -- vpunpcklqdq t1,ruwy3,ruwy3 -- vmovd u3,t1x -- vmovd r3,t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,ruwy3,ruwy3 -- vpslld $2,ruwy3,svxz3 -- vpaddd ruwy3,svxz3,svxz3 -- -- # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5 -- vmovd y4,ruwy4x -- vmovd w4,t1x -- vpunpcklqdq t1,ruwy4,ruwy4 -- vmovd u4,t1x -- vmovd r4,t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,ruwy4,ruwy4 -- vpslld $2,ruwy4,svxz4 -- vpaddd ruwy4,svxz4,svxz4 -- --.Ldoblock4: -- # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff, -- # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0] -- vmovd 0x00(m),hc0x -- vmovd 0x10(m),t1x -- vpunpcklqdq t1,hc0,hc0 -- vmovd 0x20(m),t1x -- vmovd 0x30(m),t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,hc0,hc0 -- vpand ANMASK(%rip),hc0,hc0 -- vmovd h0,t1x -- vpaddd t1,hc0,hc0 -- # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff, -- # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1] -- vmovd 0x03(m),hc1x -- vmovd 0x13(m),t1x -- vpunpcklqdq t1,hc1,hc1 -- vmovd 0x23(m),t1x -- vmovd 0x33(m),t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,hc1,hc1 -- vpsrld $2,hc1,hc1 -- vpand ANMASK(%rip),hc1,hc1 -- vmovd h1,t1x -- vpaddd t1,hc1,hc1 -- # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff, -- # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2] -- vmovd 0x06(m),hc2x -- vmovd 0x16(m),t1x -- vpunpcklqdq t1,hc2,hc2 -- vmovd 0x26(m),t1x -- vmovd 0x36(m),t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,hc2,hc2 -- vpsrld $4,hc2,hc2 -- vpand ANMASK(%rip),hc2,hc2 -- vmovd h2,t1x -- vpaddd t1,hc2,hc2 -- # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff, -- # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3] -- vmovd 0x09(m),hc3x -- vmovd 0x19(m),t1x -- vpunpcklqdq t1,hc3,hc3 -- vmovd 0x29(m),t1x -- vmovd 0x39(m),t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,hc3,hc3 -- vpsrld $6,hc3,hc3 -- vpand ANMASK(%rip),hc3,hc3 -- vmovd h3,t1x -- vpaddd t1,hc3,hc3 -- # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24), -- # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4] -- vmovd 0x0c(m),hc4x -- vmovd 0x1c(m),t1x -- vpunpcklqdq t1,hc4,hc4 -- vmovd 0x2c(m),t1x -- vmovd 0x3c(m),t2x -- vpunpcklqdq t2,t1,t1 -- vperm2i128 $0x20,t1,hc4,hc4 -- vpsrld $8,hc4,hc4 -- vpor ORMASK(%rip),hc4,hc4 -- vmovd h4,t1x -- vpaddd t1,hc4,hc4 -- -- # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ] -- vpmuludq hc0,ruwy0,t1 -- # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ] -- vpmuludq hc1,svxz4,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ] -- vpmuludq hc2,svxz3,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ] -- vpmuludq hc3,svxz2,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ] -- vpmuludq hc4,svxz1,t2 -- vpaddq t2,t1,t1 -- # d0 = t1[0] + t1[1] + t[2] + t[3] -- vpermq $0xee,t1,t2 -- vpaddq t2,t1,t1 -- vpsrldq $8,t1,t2 -- vpaddq t2,t1,t1 -- vmovq t1x,d0 -- -- # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ] -- vpmuludq hc0,ruwy1,t1 -- # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ] -- vpmuludq hc1,ruwy0,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ] -- vpmuludq hc2,svxz4,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ] -- vpmuludq hc3,svxz3,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ] -- vpmuludq hc4,svxz2,t2 -- vpaddq t2,t1,t1 -- # d1 = t1[0] + t1[1] + t1[3] + t1[4] -- vpermq $0xee,t1,t2 -- vpaddq t2,t1,t1 -- vpsrldq $8,t1,t2 -- vpaddq t2,t1,t1 -- vmovq t1x,d1 -- -- # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ] -- vpmuludq hc0,ruwy2,t1 -- # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ] -- vpmuludq hc1,ruwy1,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ] -- vpmuludq hc2,ruwy0,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ] -- vpmuludq hc3,svxz4,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ] -- vpmuludq hc4,svxz3,t2 -- vpaddq t2,t1,t1 -- # d2 = t1[0] + t1[1] + t1[2] + t1[3] -- vpermq $0xee,t1,t2 -- vpaddq t2,t1,t1 -- vpsrldq $8,t1,t2 -- vpaddq t2,t1,t1 -- vmovq t1x,d2 -- -- # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ] -- vpmuludq hc0,ruwy3,t1 -- # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ] -- vpmuludq hc1,ruwy2,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ] -- vpmuludq hc2,ruwy1,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ] -- vpmuludq hc3,ruwy0,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ] -- vpmuludq hc4,svxz4,t2 -- vpaddq t2,t1,t1 -- # d3 = t1[0] + t1[1] + t1[2] + t1[3] -- vpermq $0xee,t1,t2 -- vpaddq t2,t1,t1 -- vpsrldq $8,t1,t2 -- vpaddq t2,t1,t1 -- vmovq t1x,d3 -- -- # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ] -- vpmuludq hc0,ruwy4,t1 -- # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ] -- vpmuludq hc1,ruwy3,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ] -- vpmuludq hc2,ruwy2,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ] -- vpmuludq hc3,ruwy1,t2 -- vpaddq t2,t1,t1 -- # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ] -- vpmuludq hc4,ruwy0,t2 -- vpaddq t2,t1,t1 -- # d4 = t1[0] + t1[1] + t1[2] + t1[3] -- vpermq $0xee,t1,t2 -- vpaddq t2,t1,t1 -- vpsrldq $8,t1,t2 -- vpaddq t2,t1,t1 -- vmovq t1x,d4 -- -- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> -- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small -- # amount. Careful: we must not assume the carry bits 'd0 >> 26', -- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit -- # integers. It's true in a single-block implementation, but not here. -- -- # d1 += d0 >> 26 -- mov d0,%rax -- shr $26,%rax -- add %rax,d1 -- # h0 = d0 & 0x3ffffff -- mov d0,%rbx -- and $0x3ffffff,%ebx -- -- # d2 += d1 >> 26 -- mov d1,%rax -- shr $26,%rax -- add %rax,d2 -- # h1 = d1 & 0x3ffffff -- mov d1,%rax -- and $0x3ffffff,%eax -- mov %eax,h1 -- -- # d3 += d2 >> 26 -- mov d2,%rax -- shr $26,%rax -- add %rax,d3 -- # h2 = d2 & 0x3ffffff -- mov d2,%rax -- and $0x3ffffff,%eax -- mov %eax,h2 -- -- # d4 += d3 >> 26 -- mov d3,%rax -- shr $26,%rax -- add %rax,d4 -- # h3 = d3 & 0x3ffffff -- mov d3,%rax -- and $0x3ffffff,%eax -- mov %eax,h3 -- -- # h0 += (d4 >> 26) * 5 -- mov d4,%rax -- shr $26,%rax -- lea (%rax,%rax,4),%rax -- add %rax,%rbx -- # h4 = d4 & 0x3ffffff -- mov d4,%rax -- and $0x3ffffff,%eax -- mov %eax,h4 -- -- # h1 += h0 >> 26 -- mov %rbx,%rax -- shr $26,%rax -- add %eax,h1 -- # h0 = h0 & 0x3ffffff -- andl $0x3ffffff,%ebx -- mov %ebx,h0 -- -- add $0x40,m -- dec %rcx -- jnz .Ldoblock4 -- -- vzeroupper -- pop %r13 -- pop %r12 -- pop %rbx -- ret --ENDPROC(poly1305_4block_avx2) ---- a/arch/x86/crypto/poly1305-sse2-x86_64.S -+++ /dev/null -@@ -1,590 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-or-later */ --/* -- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions -- * -- * Copyright (C) 2015 Martin Willi -- */ -- --#include <linux/linkage.h> -- --.section .rodata.cst16.ANMASK, "aM", @progbits, 16 --.align 16 --ANMASK: .octa 0x0000000003ffffff0000000003ffffff -- --.section .rodata.cst16.ORMASK, "aM", @progbits, 16 --.align 16 --ORMASK: .octa 0x00000000010000000000000001000000 -- --.text -- --#define h0 0x00(%rdi) --#define h1 0x04(%rdi) --#define h2 0x08(%rdi) --#define h3 0x0c(%rdi) --#define h4 0x10(%rdi) --#define r0 0x00(%rdx) --#define r1 0x04(%rdx) --#define r2 0x08(%rdx) --#define r3 0x0c(%rdx) --#define r4 0x10(%rdx) --#define s1 0x00(%rsp) --#define s2 0x04(%rsp) --#define s3 0x08(%rsp) --#define s4 0x0c(%rsp) --#define m %rsi --#define h01 %xmm0 --#define h23 %xmm1 --#define h44 %xmm2 --#define t1 %xmm3 --#define t2 %xmm4 --#define t3 %xmm5 --#define t4 %xmm6 --#define mask %xmm7 --#define d0 %r8 --#define d1 %r9 --#define d2 %r10 --#define d3 %r11 --#define d4 %r12 -- --ENTRY(poly1305_block_sse2) -- # %rdi: Accumulator h[5] -- # %rsi: 16 byte input block m -- # %rdx: Poly1305 key r[5] -- # %rcx: Block count -- -- # This single block variant tries to improve performance by doing two -- # multiplications in parallel using SSE instructions. There is quite -- # some quardword packing involved, hence the speedup is marginal. -- -- push %rbx -- push %r12 -- sub $0x10,%rsp -- -- # s1..s4 = r1..r4 * 5 -- mov r1,%eax -- lea (%eax,%eax,4),%eax -- mov %eax,s1 -- mov r2,%eax -- lea (%eax,%eax,4),%eax -- mov %eax,s2 -- mov r3,%eax -- lea (%eax,%eax,4),%eax -- mov %eax,s3 -- mov r4,%eax -- lea (%eax,%eax,4),%eax -- mov %eax,s4 -- -- movdqa ANMASK(%rip),mask -- --.Ldoblock: -- # h01 = [0, h1, 0, h0] -- # h23 = [0, h3, 0, h2] -- # h44 = [0, h4, 0, h4] -- movd h0,h01 -- movd h1,t1 -- movd h2,h23 -- movd h3,t2 -- movd h4,h44 -- punpcklqdq t1,h01 -- punpcklqdq t2,h23 -- punpcklqdq h44,h44 -- -- # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ] -- movd 0x00(m),t1 -- movd 0x03(m),t2 -- psrld $2,t2 -- punpcklqdq t2,t1 -- pand mask,t1 -- paddd t1,h01 -- # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ] -- movd 0x06(m),t1 -- movd 0x09(m),t2 -- psrld $4,t1 -- psrld $6,t2 -- punpcklqdq t2,t1 -- pand mask,t1 -- paddd t1,h23 -- # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ] -- mov 0x0c(m),%eax -- shr $8,%eax -- or $0x01000000,%eax -- movd %eax,t1 -- pshufd $0xc4,t1,t1 -- paddd t1,h44 -- -- # t1[0] = h0 * r0 + h2 * s3 -- # t1[1] = h1 * s4 + h3 * s2 -- movd r0,t1 -- movd s4,t2 -- punpcklqdq t2,t1 -- pmuludq h01,t1 -- movd s3,t2 -- movd s2,t3 -- punpcklqdq t3,t2 -- pmuludq h23,t2 -- paddq t2,t1 -- # t2[0] = h0 * r1 + h2 * s4 -- # t2[1] = h1 * r0 + h3 * s3 -- movd r1,t2 -- movd r0,t3 -- punpcklqdq t3,t2 -- pmuludq h01,t2 -- movd s4,t3 -- movd s3,t4 -- punpcklqdq t4,t3 -- pmuludq h23,t3 -- paddq t3,t2 -- # t3[0] = h4 * s1 -- # t3[1] = h4 * s2 -- movd s1,t3 -- movd s2,t4 -- punpcklqdq t4,t3 -- pmuludq h44,t3 -- # d0 = t1[0] + t1[1] + t3[0] -- # d1 = t2[0] + t2[1] + t3[1] -- movdqa t1,t4 -- punpcklqdq t2,t4 -- punpckhqdq t2,t1 -- paddq t4,t1 -- paddq t3,t1 -- movq t1,d0 -- psrldq $8,t1 -- movq t1,d1 -- -- # t1[0] = h0 * r2 + h2 * r0 -- # t1[1] = h1 * r1 + h3 * s4 -- movd r2,t1 -- movd r1,t2 -- punpcklqdq t2,t1 -- pmuludq h01,t1 -- movd r0,t2 -- movd s4,t3 -- punpcklqdq t3,t2 -- pmuludq h23,t2 -- paddq t2,t1 -- # t2[0] = h0 * r3 + h2 * r1 -- # t2[1] = h1 * r2 + h3 * r0 -- movd r3,t2 -- movd r2,t3 -- punpcklqdq t3,t2 -- pmuludq h01,t2 -- movd r1,t3 -- movd r0,t4 -- punpcklqdq t4,t3 -- pmuludq h23,t3 -- paddq t3,t2 -- # t3[0] = h4 * s3 -- # t3[1] = h4 * s4 -- movd s3,t3 -- movd s4,t4 -- punpcklqdq t4,t3 -- pmuludq h44,t3 -- # d2 = t1[0] + t1[1] + t3[0] -- # d3 = t2[0] + t2[1] + t3[1] -- movdqa t1,t4 -- punpcklqdq t2,t4 -- punpckhqdq t2,t1 -- paddq t4,t1 -- paddq t3,t1 -- movq t1,d2 -- psrldq $8,t1 -- movq t1,d3 -- -- # t1[0] = h0 * r4 + h2 * r2 -- # t1[1] = h1 * r3 + h3 * r1 -- movd r4,t1 -- movd r3,t2 -- punpcklqdq t2,t1 -- pmuludq h01,t1 -- movd r2,t2 -- movd r1,t3 -- punpcklqdq t3,t2 -- pmuludq h23,t2 -- paddq t2,t1 -- # t3[0] = h4 * r0 -- movd r0,t3 -- pmuludq h44,t3 -- # d4 = t1[0] + t1[1] + t3[0] -- movdqa t1,t4 -- psrldq $8,t4 -- paddq t4,t1 -- paddq t3,t1 -- movq t1,d4 -- -- # d1 += d0 >> 26 -- mov d0,%rax -- shr $26,%rax -- add %rax,d1 -- # h0 = d0 & 0x3ffffff -- mov d0,%rbx -- and $0x3ffffff,%ebx -- -- # d2 += d1 >> 26 -- mov d1,%rax -- shr $26,%rax -- add %rax,d2 -- # h1 = d1 & 0x3ffffff -- mov d1,%rax -- and $0x3ffffff,%eax -- mov %eax,h1 -- -- # d3 += d2 >> 26 -- mov d2,%rax -- shr $26,%rax -- add %rax,d3 -- # h2 = d2 & 0x3ffffff -- mov d2,%rax -- and $0x3ffffff,%eax -- mov %eax,h2 -- -- # d4 += d3 >> 26 -- mov d3,%rax -- shr $26,%rax -- add %rax,d4 -- # h3 = d3 & 0x3ffffff -- mov d3,%rax -- and $0x3ffffff,%eax -- mov %eax,h3 -- -- # h0 += (d4 >> 26) * 5 -- mov d4,%rax -- shr $26,%rax -- lea (%rax,%rax,4),%rax -- add %rax,%rbx -- # h4 = d4 & 0x3ffffff -- mov d4,%rax -- and $0x3ffffff,%eax -- mov %eax,h4 -- -- # h1 += h0 >> 26 -- mov %rbx,%rax -- shr $26,%rax -- add %eax,h1 -- # h0 = h0 & 0x3ffffff -- andl $0x3ffffff,%ebx -- mov %ebx,h0 -- -- add $0x10,m -- dec %rcx -- jnz .Ldoblock -- -- # Zeroing of key material -- mov %rcx,0x00(%rsp) -- mov %rcx,0x08(%rsp) -- -- add $0x10,%rsp -- pop %r12 -- pop %rbx -- ret --ENDPROC(poly1305_block_sse2) -- -- --#define u0 0x00(%r8) --#define u1 0x04(%r8) --#define u2 0x08(%r8) --#define u3 0x0c(%r8) --#define u4 0x10(%r8) --#define hc0 %xmm0 --#define hc1 %xmm1 --#define hc2 %xmm2 --#define hc3 %xmm5 --#define hc4 %xmm6 --#define ru0 %xmm7 --#define ru1 %xmm8 --#define ru2 %xmm9 --#define ru3 %xmm10 --#define ru4 %xmm11 --#define sv1 %xmm12 --#define sv2 %xmm13 --#define sv3 %xmm14 --#define sv4 %xmm15 --#undef d0 --#define d0 %r13 -- --ENTRY(poly1305_2block_sse2) -- # %rdi: Accumulator h[5] -- # %rsi: 16 byte input block m -- # %rdx: Poly1305 key r[5] -- # %rcx: Doubleblock count -- # %r8: Poly1305 derived key r^2 u[5] -- -- # This two-block variant further improves performance by using loop -- # unrolled block processing. This is more straight forward and does -- # less byte shuffling, but requires a second Poly1305 key r^2: -- # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r -- -- push %rbx -- push %r12 -- push %r13 -- -- # combine r0,u0 -- movd u0,ru0 -- movd r0,t1 -- punpcklqdq t1,ru0 -- -- # combine r1,u1 and s1=r1*5,v1=u1*5 -- movd u1,ru1 -- movd r1,t1 -- punpcklqdq t1,ru1 -- movdqa ru1,sv1 -- pslld $2,sv1 -- paddd ru1,sv1 -- -- # combine r2,u2 and s2=r2*5,v2=u2*5 -- movd u2,ru2 -- movd r2,t1 -- punpcklqdq t1,ru2 -- movdqa ru2,sv2 -- pslld $2,sv2 -- paddd ru2,sv2 -- -- # combine r3,u3 and s3=r3*5,v3=u3*5 -- movd u3,ru3 -- movd r3,t1 -- punpcklqdq t1,ru3 -- movdqa ru3,sv3 -- pslld $2,sv3 -- paddd ru3,sv3 -- -- # combine r4,u4 and s4=r4*5,v4=u4*5 -- movd u4,ru4 -- movd r4,t1 -- punpcklqdq t1,ru4 -- movdqa ru4,sv4 -- pslld $2,sv4 -- paddd ru4,sv4 -- --.Ldoblock2: -- # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ] -- movd 0x00(m),hc0 -- movd 0x10(m),t1 -- punpcklqdq t1,hc0 -- pand ANMASK(%rip),hc0 -- movd h0,t1 -- paddd t1,hc0 -- # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ] -- movd 0x03(m),hc1 -- movd 0x13(m),t1 -- punpcklqdq t1,hc1 -- psrld $2,hc1 -- pand ANMASK(%rip),hc1 -- movd h1,t1 -- paddd t1,hc1 -- # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ] -- movd 0x06(m),hc2 -- movd 0x16(m),t1 -- punpcklqdq t1,hc2 -- psrld $4,hc2 -- pand ANMASK(%rip),hc2 -- movd h2,t1 -- paddd t1,hc2 -- # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ] -- movd 0x09(m),hc3 -- movd 0x19(m),t1 -- punpcklqdq t1,hc3 -- psrld $6,hc3 -- pand ANMASK(%rip),hc3 -- movd h3,t1 -- paddd t1,hc3 -- # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ] -- movd 0x0c(m),hc4 -- movd 0x1c(m),t1 -- punpcklqdq t1,hc4 -- psrld $8,hc4 -- por ORMASK(%rip),hc4 -- movd h4,t1 -- paddd t1,hc4 -- -- # t1 = [ hc0[1] * r0, hc0[0] * u0 ] -- movdqa ru0,t1 -- pmuludq hc0,t1 -- # t1 += [ hc1[1] * s4, hc1[0] * v4 ] -- movdqa sv4,t2 -- pmuludq hc1,t2 -- paddq t2,t1 -- # t1 += [ hc2[1] * s3, hc2[0] * v3 ] -- movdqa sv3,t2 -- pmuludq hc2,t2 -- paddq t2,t1 -- # t1 += [ hc3[1] * s2, hc3[0] * v2 ] -- movdqa sv2,t2 -- pmuludq hc3,t2 -- paddq t2,t1 -- # t1 += [ hc4[1] * s1, hc4[0] * v1 ] -- movdqa sv1,t2 -- pmuludq hc4,t2 -- paddq t2,t1 -- # d0 = t1[0] + t1[1] -- movdqa t1,t2 -- psrldq $8,t2 -- paddq t2,t1 -- movq t1,d0 -- -- # t1 = [ hc0[1] * r1, hc0[0] * u1 ] -- movdqa ru1,t1 -- pmuludq hc0,t1 -- # t1 += [ hc1[1] * r0, hc1[0] * u0 ] -- movdqa ru0,t2 -- pmuludq hc1,t2 -- paddq t2,t1 -- # t1 += [ hc2[1] * s4, hc2[0] * v4 ] -- movdqa sv4,t2 -- pmuludq hc2,t2 -- paddq t2,t1 -- # t1 += [ hc3[1] * s3, hc3[0] * v3 ] -- movdqa sv3,t2 -- pmuludq hc3,t2 -- paddq t2,t1 -- # t1 += [ hc4[1] * s2, hc4[0] * v2 ] -- movdqa sv2,t2 -- pmuludq hc4,t2 -- paddq t2,t1 -- # d1 = t1[0] + t1[1] -- movdqa t1,t2 -- psrldq $8,t2 -- paddq t2,t1 -- movq t1,d1 -- -- # t1 = [ hc0[1] * r2, hc0[0] * u2 ] -- movdqa ru2,t1 -- pmuludq hc0,t1 -- # t1 += [ hc1[1] * r1, hc1[0] * u1 ] -- movdqa ru1,t2 -- pmuludq hc1,t2 -- paddq t2,t1 -- # t1 += [ hc2[1] * r0, hc2[0] * u0 ] -- movdqa ru0,t2 -- pmuludq hc2,t2 -- paddq t2,t1 -- # t1 += [ hc3[1] * s4, hc3[0] * v4 ] -- movdqa sv4,t2 -- pmuludq hc3,t2 -- paddq t2,t1 -- # t1 += [ hc4[1] * s3, hc4[0] * v3 ] -- movdqa sv3,t2 -- pmuludq hc4,t2 -- paddq t2,t1 -- # d2 = t1[0] + t1[1] -- movdqa t1,t2 -- psrldq $8,t2 -- paddq t2,t1 -- movq t1,d2 -- -- # t1 = [ hc0[1] * r3, hc0[0] * u3 ] -- movdqa ru3,t1 -- pmuludq hc0,t1 -- # t1 += [ hc1[1] * r2, hc1[0] * u2 ] -- movdqa ru2,t2 -- pmuludq hc1,t2 -- paddq t2,t1 -- # t1 += [ hc2[1] * r1, hc2[0] * u1 ] -- movdqa ru1,t2 -- pmuludq hc2,t2 -- paddq t2,t1 -- # t1 += [ hc3[1] * r0, hc3[0] * u0 ] -- movdqa ru0,t2 -- pmuludq hc3,t2 -- paddq t2,t1 -- # t1 += [ hc4[1] * s4, hc4[0] * v4 ] -- movdqa sv4,t2 -- pmuludq hc4,t2 -- paddq t2,t1 -- # d3 = t1[0] + t1[1] -- movdqa t1,t2 -- psrldq $8,t2 -- paddq t2,t1 -- movq t1,d3 -- -- # t1 = [ hc0[1] * r4, hc0[0] * u4 ] -- movdqa ru4,t1 -- pmuludq hc0,t1 -- # t1 += [ hc1[1] * r3, hc1[0] * u3 ] -- movdqa ru3,t2 -- pmuludq hc1,t2 -- paddq t2,t1 -- # t1 += [ hc2[1] * r2, hc2[0] * u2 ] -- movdqa ru2,t2 -- pmuludq hc2,t2 -- paddq t2,t1 -- # t1 += [ hc3[1] * r1, hc3[0] * u1 ] -- movdqa ru1,t2 -- pmuludq hc3,t2 -- paddq t2,t1 -- # t1 += [ hc4[1] * r0, hc4[0] * u0 ] -- movdqa ru0,t2 -- pmuludq hc4,t2 -- paddq t2,t1 -- # d4 = t1[0] + t1[1] -- movdqa t1,t2 -- psrldq $8,t2 -- paddq t2,t1 -- movq t1,d4 -- -- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> -- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small -- # amount. Careful: we must not assume the carry bits 'd0 >> 26', -- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit -- # integers. It's true in a single-block implementation, but not here. -- -- # d1 += d0 >> 26 -- mov d0,%rax -- shr $26,%rax -- add %rax,d1 -- # h0 = d0 & 0x3ffffff -- mov d0,%rbx -- and $0x3ffffff,%ebx -- -- # d2 += d1 >> 26 -- mov d1,%rax -- shr $26,%rax -- add %rax,d2 -- # h1 = d1 & 0x3ffffff -- mov d1,%rax -- and $0x3ffffff,%eax -- mov %eax,h1 -- -- # d3 += d2 >> 26 -- mov d2,%rax -- shr $26,%rax -- add %rax,d3 -- # h2 = d2 & 0x3ffffff -- mov d2,%rax -- and $0x3ffffff,%eax -- mov %eax,h2 -- -- # d4 += d3 >> 26 -- mov d3,%rax -- shr $26,%rax -- add %rax,d4 -- # h3 = d3 & 0x3ffffff -- mov d3,%rax -- and $0x3ffffff,%eax -- mov %eax,h3 -- -- # h0 += (d4 >> 26) * 5 -- mov d4,%rax -- shr $26,%rax -- lea (%rax,%rax,4),%rax -- add %rax,%rbx -- # h4 = d4 & 0x3ffffff -- mov d4,%rax -- and $0x3ffffff,%eax -- mov %eax,h4 -- -- # h1 += h0 >> 26 -- mov %rbx,%rax -- shr $26,%rax -- add %eax,h1 -- # h0 = h0 & 0x3ffffff -- andl $0x3ffffff,%ebx -- mov %ebx,h0 -- -- add $0x20,m -- dec %rcx -- jnz .Ldoblock2 -- -- pop %r13 -- pop %r12 -- pop %rbx -- ret --ENDPROC(poly1305_2block_sse2) ---- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl -+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl -@@ -1,11 +1,14 @@ --#! /usr/bin/env perl --# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved. -+#!/usr/bin/env perl -+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause - # --# Licensed under the OpenSSL license (the "License"). You may not use --# this file except in compliance with the License. You can obtain a copy --# in the file LICENSE in the source distribution or at --# https://www.openssl.org/source/license.html -- -+# Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. -+# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. -+# -+# This code is taken from the OpenSSL project but the author, Andy Polyakov, -+# has relicensed it under the licenses specified in the SPDX header above. -+# The original headers, including the original license headers, are -+# included below for completeness. - # - # ==================================================================== - # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -@@ -32,7 +35,7 @@ - # Skylake-X system performance. Since we are likely to suppress - # AVX512F capability flag [at least on Skylake-X], conversion serves - # as kind of "investment protection". Note that next *lake processor, --# Cannolake, has AVX512IFMA code path to execute... -+# Cannonlake, has AVX512IFMA code path to execute... - # - # Numbers are cycles per processed byte with poly1305_blocks alone, - # measured with rdtsc at fixed clock frequency. -@@ -68,39 +71,114 @@ $output = shift; - if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - - $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+$kernel=0; $kernel=1 if (!$flavour && !$output); - --$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; --( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or --( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or --die "can't locate x86_64-xlate.pl"; -- --if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -- =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -- $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26); -+if (!$kernel) { -+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+ ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+ die "can't locate x86_64-xlate.pl"; -+ -+ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; -+ *STDOUT=*OUT; -+ -+ if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25); -+ } -+ -+ if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { -+ $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12); -+ $avx += 1 if ($1==2.11 && $2>=8); -+ } -+ -+ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -+ `ml64 2>&1` =~ /Version ([0-9]+)\./) { -+ $avx = ($1>=10) + ($1>=11); -+ } -+ -+ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { -+ $avx = ($2>=3.0) + ($2>3.0); -+ } -+} else { -+ $avx = 4; # The kernel uses ifdefs for this. - } - --if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) { -- $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12); -- $avx += 2 if ($1==2.11 && $2>=8); -+sub declare_function() { -+ my ($name, $align, $nargs) = @_; -+ if($kernel) { -+ $code .= ".align $align\n"; -+ $code .= "ENTRY($name)\n"; -+ $code .= ".L$name:\n"; -+ } else { -+ $code .= ".globl $name\n"; -+ $code .= ".type $name,\@function,$nargs\n"; -+ $code .= ".align $align\n"; -+ $code .= "$name:\n"; -+ } - } - --if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -- `ml64 2>&1` =~ /Version ([0-9]+)\./) { -- $avx = ($1>=10) + ($1>=12); -+sub end_function() { -+ my ($name) = @_; -+ if($kernel) { -+ $code .= "ENDPROC($name)\n"; -+ } else { -+ $code .= ".size $name,.-$name\n"; -+ } - } - --if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { -- $avx = ($2>=3.0) + ($2>3.0); --} -+$code.=<<___ if $kernel; -+#include <linux/linkage.h> -+___ - --open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; --*STDOUT=*OUT; -+if ($avx) { -+$code.=<<___ if $kernel; -+.section .rodata -+___ -+$code.=<<___; -+.align 64 -+.Lconst: -+.Lmask24: -+.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 -+.L129: -+.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 -+.Lmask26: -+.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 -+.Lpermd_avx2: -+.long 2,2,2,3,2,0,2,1 -+.Lpermd_avx512: -+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 -+ -+.L2_44_inp_permd: -+.long 0,1,1,2,2,3,7,7 -+.L2_44_inp_shift: -+.quad 0,12,24,64 -+.L2_44_mask: -+.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff -+.L2_44_shift_rgt: -+.quad 44,44,42,64 -+.L2_44_shift_lft: -+.quad 8,8,10,64 -+ -+.align 64 -+.Lx_mask44: -+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff -+.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff -+.Lx_mask42: -+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff -+.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff -+___ -+} -+$code.=<<___ if (!$kernel); -+.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" -+.align 16 -+___ - - my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx"); - my ($mac,$nonce)=($inp,$len); # *_emit arguments --my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13)); --my ($h0,$h1,$h2)=("%r14","%rbx","%rbp"); -+my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13"); -+my ($h0,$h1,$h2)=("%r14","%rbx","%r10"); - - sub poly1305_iteration { - # input: copy of $r1 in %rax, $h0-$h2, $r0-$r1 -@@ -155,19 +233,19 @@ ___ - - $code.=<<___; - .text -- -+___ -+$code.=<<___ if (!$kernel); - .extern OPENSSL_ia32cap_P - --.globl poly1305_init --.hidden poly1305_init --.globl poly1305_blocks --.hidden poly1305_blocks --.globl poly1305_emit --.hidden poly1305_emit -- --.type poly1305_init,\@function,3 --.align 32 --poly1305_init: -+.globl poly1305_init_x86_64 -+.hidden poly1305_init_x86_64 -+.globl poly1305_blocks_x86_64 -+.hidden poly1305_blocks_x86_64 -+.globl poly1305_emit_x86_64 -+.hidden poly1305_emit_x86_64 -+___ -+&declare_function("poly1305_init_x86_64", 32, 3); -+$code.=<<___; - xor %rax,%rax - mov %rax,0($ctx) # initialize hash value - mov %rax,8($ctx) -@@ -175,11 +253,12 @@ poly1305_init: - - cmp \$0,$inp - je .Lno_key -- -- lea poly1305_blocks(%rip),%r10 -- lea poly1305_emit(%rip),%r11 - ___ --$code.=<<___ if ($avx); -+$code.=<<___ if (!$kernel); -+ lea poly1305_blocks_x86_64(%rip),%r10 -+ lea poly1305_emit_x86_64(%rip),%r11 -+___ -+$code.=<<___ if (!$kernel && $avx); - mov OPENSSL_ia32cap_P+4(%rip),%r9 - lea poly1305_blocks_avx(%rip),%rax - lea poly1305_emit_avx(%rip),%rcx -@@ -187,12 +266,12 @@ $code.=<<___ if ($avx); - cmovc %rax,%r10 - cmovc %rcx,%r11 - ___ --$code.=<<___ if ($avx>1); -+$code.=<<___ if (!$kernel && $avx>1); - lea poly1305_blocks_avx2(%rip),%rax - bt \$`5+32`,%r9 # AVX2? - cmovc %rax,%r10 - ___ --$code.=<<___ if ($avx>3); -+$code.=<<___ if (!$kernel && $avx>3); - mov \$`(1<<31|1<<21|1<<16)`,%rax - shr \$32,%r9 - and %rax,%r9 -@@ -207,11 +286,11 @@ $code.=<<___; - mov %rax,24($ctx) - mov %rcx,32($ctx) - ___ --$code.=<<___ if ($flavour !~ /elf32/); -+$code.=<<___ if (!$kernel && $flavour !~ /elf32/); - mov %r10,0(%rdx) - mov %r11,8(%rdx) - ___ --$code.=<<___ if ($flavour =~ /elf32/); -+$code.=<<___ if (!$kernel && $flavour =~ /elf32/); - mov %r10d,0(%rdx) - mov %r11d,4(%rdx) - ___ -@@ -219,11 +298,11 @@ $code.=<<___; - mov \$1,%eax - .Lno_key: - ret --.size poly1305_init,.-poly1305_init -+___ -+&end_function("poly1305_init_x86_64"); - --.type poly1305_blocks,\@function,4 --.align 32 --poly1305_blocks: -+&declare_function("poly1305_blocks_x86_64", 32, 4); -+$code.=<<___; - .cfi_startproc - .Lblocks: - shr \$4,$len -@@ -231,8 +310,6 @@ poly1305_blocks: - - push %rbx - .cfi_push %rbx -- push %rbp --.cfi_push %rbp - push %r12 - .cfi_push %r12 - push %r13 -@@ -241,6 +318,8 @@ poly1305_blocks: - .cfi_push %r14 - push %r15 - .cfi_push %r15 -+ push $ctx -+.cfi_push $ctx - .Lblocks_body: - - mov $len,%r15 # reassign $len -@@ -265,26 +344,29 @@ poly1305_blocks: - lea 16($inp),$inp - adc $padbit,$h2 - ___ -+ - &poly1305_iteration(); -+ - $code.=<<___; - mov $r1,%rax - dec %r15 # len-=16 - jnz .Loop - -+ mov 0(%rsp),$ctx -+.cfi_restore $ctx -+ - mov $h0,0($ctx) # store hash value - mov $h1,8($ctx) - mov $h2,16($ctx) - -- mov 0(%rsp),%r15 -+ mov 8(%rsp),%r15 - .cfi_restore %r15 -- mov 8(%rsp),%r14 -+ mov 16(%rsp),%r14 - .cfi_restore %r14 -- mov 16(%rsp),%r13 -+ mov 24(%rsp),%r13 - .cfi_restore %r13 -- mov 24(%rsp),%r12 -+ mov 32(%rsp),%r12 - .cfi_restore %r12 -- mov 32(%rsp),%rbp --.cfi_restore %rbp - mov 40(%rsp),%rbx - .cfi_restore %rbx - lea 48(%rsp),%rsp -@@ -293,11 +375,11 @@ $code.=<<___; - .Lblocks_epilogue: - ret - .cfi_endproc --.size poly1305_blocks,.-poly1305_blocks -+___ -+&end_function("poly1305_blocks_x86_64"); - --.type poly1305_emit,\@function,3 --.align 32 --poly1305_emit: -+&declare_function("poly1305_emit_x86_64", 32, 3); -+$code.=<<___; - .Lemit: - mov 0($ctx),%r8 # load hash value - mov 8($ctx),%r9 -@@ -318,10 +400,14 @@ poly1305_emit: - mov %rcx,8($mac) - - ret --.size poly1305_emit,.-poly1305_emit - ___ -+&end_function("poly1305_emit_x86_64"); - if ($avx) { - -+if($kernel) { -+ $code .= "#ifdef CONFIG_AS_AVX\n"; -+} -+ - ######################################################################## - # Layout of opaque area is following. - # -@@ -342,15 +428,19 @@ $code.=<<___; - .type __poly1305_block,\@abi-omnipotent - .align 32 - __poly1305_block: -+ push $ctx - ___ - &poly1305_iteration(); - $code.=<<___; -+ pop $ctx - ret - .size __poly1305_block,.-__poly1305_block - - .type __poly1305_init_avx,\@abi-omnipotent - .align 32 - __poly1305_init_avx: -+ push %rbp -+ mov %rsp,%rbp - mov $r0,$h0 - mov $r1,$h1 - xor $h2,$h2 -@@ -507,12 +597,13 @@ __poly1305_init_avx: - mov $d1#d,`16*8+8-64`($ctx) - - lea -48-64($ctx),$ctx # size [de-]optimization -+ pop %rbp - ret - .size __poly1305_init_avx,.-__poly1305_init_avx -+___ - --.type poly1305_blocks_avx,\@function,4 --.align 32 --poly1305_blocks_avx: -+&declare_function("poly1305_blocks_avx", 32, 4); -+$code.=<<___; - .cfi_startproc - mov 20($ctx),%r8d # is_base2_26 - cmp \$128,$len -@@ -532,10 +623,11 @@ poly1305_blocks_avx: - test \$31,$len - jz .Leven_avx - -- push %rbx --.cfi_push %rbx - push %rbp - .cfi_push %rbp -+ mov %rsp,%rbp -+ push %rbx -+.cfi_push %rbx - push %r12 - .cfi_push %r12 - push %r13 -@@ -645,20 +737,18 @@ poly1305_blocks_avx: - mov $h2#d,16($ctx) - .align 16 - .Ldone_avx: -- mov 0(%rsp),%r15 -+ pop %r15 - .cfi_restore %r15 -- mov 8(%rsp),%r14 -+ pop %r14 - .cfi_restore %r14 -- mov 16(%rsp),%r13 -+ pop %r13 - .cfi_restore %r13 -- mov 24(%rsp),%r12 -+ pop %r12 - .cfi_restore %r12 -- mov 32(%rsp),%rbp --.cfi_restore %rbp -- mov 40(%rsp),%rbx -+ pop %rbx - .cfi_restore %rbx -- lea 48(%rsp),%rsp --.cfi_adjust_cfa_offset -48 -+ pop %rbp -+.cfi_restore %rbp - .Lno_data_avx: - .Lblocks_avx_epilogue: - ret -@@ -667,10 +757,11 @@ poly1305_blocks_avx: - .align 32 - .Lbase2_64_avx: - .cfi_startproc -- push %rbx --.cfi_push %rbx - push %rbp - .cfi_push %rbp -+ mov %rsp,%rbp -+ push %rbx -+.cfi_push %rbx - push %r12 - .cfi_push %r12 - push %r13 -@@ -736,22 +827,18 @@ poly1305_blocks_avx: - - .Lproceed_avx: - mov %r15,$len -- -- mov 0(%rsp),%r15 -+ pop %r15 - .cfi_restore %r15 -- mov 8(%rsp),%r14 -+ pop %r14 - .cfi_restore %r14 -- mov 16(%rsp),%r13 -+ pop %r13 - .cfi_restore %r13 -- mov 24(%rsp),%r12 -+ pop %r12 - .cfi_restore %r12 -- mov 32(%rsp),%rbp --.cfi_restore %rbp -- mov 40(%rsp),%rbx -+ pop %rbx - .cfi_restore %rbx -- lea 48(%rsp),%rax -- lea 48(%rsp),%rsp --.cfi_adjust_cfa_offset -48 -+ pop %rbp -+.cfi_restore %rbp - .Lbase2_64_avx_epilogue: - jmp .Ldo_avx - .cfi_endproc -@@ -768,8 +855,11 @@ poly1305_blocks_avx: - .Ldo_avx: - ___ - $code.=<<___ if (!$win64); -+ lea 8(%rsp),%r10 -+.cfi_def_cfa_register %r10 -+ and \$-32,%rsp -+ sub \$-8,%rsp - lea -0x58(%rsp),%r11 --.cfi_def_cfa %r11,0x60 - sub \$0x178,%rsp - ___ - $code.=<<___ if ($win64); -@@ -1361,18 +1451,18 @@ $code.=<<___ if ($win64); - .Ldo_avx_epilogue: - ___ - $code.=<<___ if (!$win64); -- lea 0x58(%r11),%rsp --.cfi_def_cfa %rsp,8 -+ lea -8(%r10),%rsp -+.cfi_def_cfa_register %rsp - ___ - $code.=<<___; - vzeroupper - ret - .cfi_endproc --.size poly1305_blocks_avx,.-poly1305_blocks_avx -+___ -+&end_function("poly1305_blocks_avx"); - --.type poly1305_emit_avx,\@function,3 --.align 32 --poly1305_emit_avx: -+&declare_function("poly1305_emit_avx", 32, 3); -+$code.=<<___; - cmpl \$0,20($ctx) # is_base2_26? - je .Lemit - -@@ -1423,41 +1513,51 @@ poly1305_emit_avx: - mov %rcx,8($mac) - - ret --.size poly1305_emit_avx,.-poly1305_emit_avx - ___ -+&end_function("poly1305_emit_avx"); -+ -+if ($kernel) { -+ $code .= "#endif\n"; -+} - - if ($avx>1) { -+ -+if ($kernel) { -+ $code .= "#ifdef CONFIG_AS_AVX2\n"; -+} -+ - my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = - map("%ymm$_",(0..15)); - my $S4=$MASK; - -+sub poly1305_blocks_avxN { -+ my ($avx512) = @_; -+ my $suffix = $avx512 ? "_avx512" : ""; - $code.=<<___; --.type poly1305_blocks_avx2,\@function,4 --.align 32 --poly1305_blocks_avx2: - .cfi_startproc - mov 20($ctx),%r8d # is_base2_26 - cmp \$128,$len -- jae .Lblocks_avx2 -+ jae .Lblocks_avx2$suffix - test %r8d,%r8d - jz .Lblocks - --.Lblocks_avx2: -+.Lblocks_avx2$suffix: - and \$-16,$len -- jz .Lno_data_avx2 -+ jz .Lno_data_avx2$suffix - - vzeroupper - - test %r8d,%r8d -- jz .Lbase2_64_avx2 -+ jz .Lbase2_64_avx2$suffix - - test \$63,$len -- jz .Leven_avx2 -+ jz .Leven_avx2$suffix - -- push %rbx --.cfi_push %rbx - push %rbp - .cfi_push %rbp -+ mov %rsp,%rbp -+ push %rbx -+.cfi_push %rbx - push %r12 - .cfi_push %r12 - push %r13 -@@ -1466,7 +1566,7 @@ poly1305_blocks_avx2: - .cfi_push %r14 - push %r15 - .cfi_push %r15 --.Lblocks_avx2_body: -+.Lblocks_avx2_body$suffix: - - mov $len,%r15 # reassign $len - -@@ -1513,7 +1613,7 @@ poly1305_blocks_avx2: - shr \$2,$s1 - add $r1,$s1 # s1 = r1 + (r1 >> 2) - --.Lbase2_26_pre_avx2: -+.Lbase2_26_pre_avx2$suffix: - add 0($inp),$h0 # accumulate input - adc 8($inp),$h1 - lea 16($inp),$inp -@@ -1524,10 +1624,10 @@ poly1305_blocks_avx2: - mov $r1,%rax - - test \$63,%r15 -- jnz .Lbase2_26_pre_avx2 -+ jnz .Lbase2_26_pre_avx2$suffix - - test $padbit,$padbit # if $padbit is zero, -- jz .Lstore_base2_64_avx2 # store hash in base 2^64 format -+ jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format - - ################################# base 2^64 -> base 2^26 - mov $h0,%rax -@@ -1548,57 +1648,56 @@ poly1305_blocks_avx2: - or $r1,$h2 # h[4] - - test %r15,%r15 -- jz .Lstore_base2_26_avx2 -+ jz .Lstore_base2_26_avx2$suffix - - vmovd %rax#d,%x#$H0 - vmovd %rdx#d,%x#$H1 - vmovd $h0#d,%x#$H2 - vmovd $h1#d,%x#$H3 - vmovd $h2#d,%x#$H4 -- jmp .Lproceed_avx2 -+ jmp .Lproceed_avx2$suffix - - .align 32 --.Lstore_base2_64_avx2: -+.Lstore_base2_64_avx2$suffix: - mov $h0,0($ctx) - mov $h1,8($ctx) - mov $h2,16($ctx) # note that is_base2_26 is zeroed -- jmp .Ldone_avx2 -+ jmp .Ldone_avx2$suffix - - .align 16 --.Lstore_base2_26_avx2: -+.Lstore_base2_26_avx2$suffix: - mov %rax#d,0($ctx) # store hash value base 2^26 - mov %rdx#d,4($ctx) - mov $h0#d,8($ctx) - mov $h1#d,12($ctx) - mov $h2#d,16($ctx) - .align 16 --.Ldone_avx2: -- mov 0(%rsp),%r15 -+.Ldone_avx2$suffix: -+ pop %r15 - .cfi_restore %r15 -- mov 8(%rsp),%r14 -+ pop %r14 - .cfi_restore %r14 -- mov 16(%rsp),%r13 -+ pop %r13 - .cfi_restore %r13 -- mov 24(%rsp),%r12 -+ pop %r12 - .cfi_restore %r12 -- mov 32(%rsp),%rbp --.cfi_restore %rbp -- mov 40(%rsp),%rbx -+ pop %rbx - .cfi_restore %rbx -- lea 48(%rsp),%rsp --.cfi_adjust_cfa_offset -48 --.Lno_data_avx2: --.Lblocks_avx2_epilogue: -+ pop %rbp -+.cfi_restore %rbp -+.Lno_data_avx2$suffix: -+.Lblocks_avx2_epilogue$suffix: - ret - .cfi_endproc - - .align 32 --.Lbase2_64_avx2: -+.Lbase2_64_avx2$suffix: - .cfi_startproc -- push %rbx --.cfi_push %rbx - push %rbp - .cfi_push %rbp -+ mov %rsp,%rbp -+ push %rbx -+.cfi_push %rbx - push %r12 - .cfi_push %r12 - push %r13 -@@ -1607,7 +1706,7 @@ poly1305_blocks_avx2: - .cfi_push %r14 - push %r15 - .cfi_push %r15 --.Lbase2_64_avx2_body: -+.Lbase2_64_avx2_body$suffix: - - mov $len,%r15 # reassign $len - -@@ -1624,9 +1723,9 @@ poly1305_blocks_avx2: - add $r1,$s1 # s1 = r1 + (r1 >> 2) - - test \$63,$len -- jz .Linit_avx2 -+ jz .Linit_avx2$suffix - --.Lbase2_64_pre_avx2: -+.Lbase2_64_pre_avx2$suffix: - add 0($inp),$h0 # accumulate input - adc 8($inp),$h1 - lea 16($inp),$inp -@@ -1637,9 +1736,9 @@ poly1305_blocks_avx2: - mov $r1,%rax - - test \$63,%r15 -- jnz .Lbase2_64_pre_avx2 -+ jnz .Lbase2_64_pre_avx2$suffix - --.Linit_avx2: -+.Linit_avx2$suffix: - ################################# base 2^64 -> base 2^26 - mov $h0,%rax - mov $h0,%rdx -@@ -1667,69 +1766,77 @@ poly1305_blocks_avx2: - - call __poly1305_init_avx - --.Lproceed_avx2: -+.Lproceed_avx2$suffix: - mov %r15,$len # restore $len -- mov OPENSSL_ia32cap_P+8(%rip),%r10d -+___ -+$code.=<<___ if (!$kernel); -+ mov OPENSSL_ia32cap_P+8(%rip),%r9d - mov \$`(1<<31|1<<30|1<<16)`,%r11d -- -- mov 0(%rsp),%r15 -+___ -+$code.=<<___; -+ pop %r15 - .cfi_restore %r15 -- mov 8(%rsp),%r14 -+ pop %r14 - .cfi_restore %r14 -- mov 16(%rsp),%r13 -+ pop %r13 - .cfi_restore %r13 -- mov 24(%rsp),%r12 -+ pop %r12 - .cfi_restore %r12 -- mov 32(%rsp),%rbp --.cfi_restore %rbp -- mov 40(%rsp),%rbx -+ pop %rbx - .cfi_restore %rbx -- lea 48(%rsp),%rax -- lea 48(%rsp),%rsp --.cfi_adjust_cfa_offset -48 --.Lbase2_64_avx2_epilogue: -- jmp .Ldo_avx2 -+ pop %rbp -+.cfi_restore %rbp -+.Lbase2_64_avx2_epilogue$suffix: -+ jmp .Ldo_avx2$suffix - .cfi_endproc - - .align 32 --.Leven_avx2: -+.Leven_avx2$suffix: - .cfi_startproc -- mov OPENSSL_ia32cap_P+8(%rip),%r10d -+___ -+$code.=<<___ if (!$kernel); -+ mov OPENSSL_ia32cap_P+8(%rip),%r9d -+___ -+$code.=<<___; - vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26 - vmovd 4*1($ctx),%x#$H1 - vmovd 4*2($ctx),%x#$H2 - vmovd 4*3($ctx),%x#$H3 - vmovd 4*4($ctx),%x#$H4 - --.Ldo_avx2: -+.Ldo_avx2$suffix: - ___ --$code.=<<___ if ($avx>2); -+$code.=<<___ if (!$kernel && $avx>2); - cmp \$512,$len - jb .Lskip_avx512 -- and %r11d,%r10d -- test \$`1<<16`,%r10d # check for AVX512F -+ and %r11d,%r9d -+ test \$`1<<16`,%r9d # check for AVX512F - jnz .Lblocks_avx512 --.Lskip_avx512: -+.Lskip_avx512$suffix: -+___ -+$code.=<<___ if ($avx > 2 && $avx512 && $kernel); -+ cmp \$512,$len -+ jae .Lblocks_avx512 - ___ - $code.=<<___ if (!$win64); -- lea -8(%rsp),%r11 --.cfi_def_cfa %r11,16 -+ lea 8(%rsp),%r10 -+.cfi_def_cfa_register %r10 - sub \$0x128,%rsp - ___ - $code.=<<___ if ($win64); -- lea -0xf8(%rsp),%r11 -+ lea 8(%rsp),%r10 - sub \$0x1c8,%rsp -- vmovdqa %xmm6,0x50(%r11) -- vmovdqa %xmm7,0x60(%r11) -- vmovdqa %xmm8,0x70(%r11) -- vmovdqa %xmm9,0x80(%r11) -- vmovdqa %xmm10,0x90(%r11) -- vmovdqa %xmm11,0xa0(%r11) -- vmovdqa %xmm12,0xb0(%r11) -- vmovdqa %xmm13,0xc0(%r11) -- vmovdqa %xmm14,0xd0(%r11) -- vmovdqa %xmm15,0xe0(%r11) --.Ldo_avx2_body: -+ vmovdqa %xmm6,-0xb0(%r10) -+ vmovdqa %xmm7,-0xa0(%r10) -+ vmovdqa %xmm8,-0x90(%r10) -+ vmovdqa %xmm9,-0x80(%r10) -+ vmovdqa %xmm10,-0x70(%r10) -+ vmovdqa %xmm11,-0x60(%r10) -+ vmovdqa %xmm12,-0x50(%r10) -+ vmovdqa %xmm13,-0x40(%r10) -+ vmovdqa %xmm14,-0x30(%r10) -+ vmovdqa %xmm15,-0x20(%r10) -+.Ldo_avx2_body$suffix: - ___ - $code.=<<___; - lea .Lconst(%rip),%rcx -@@ -1794,11 +1901,11 @@ $code.=<<___; - - vpaddq $H2,$T2,$H2 # accumulate input - sub \$64,$len -- jz .Ltail_avx2 -- jmp .Loop_avx2 -+ jz .Ltail_avx2$suffix -+ jmp .Loop_avx2$suffix - - .align 32 --.Loop_avx2: -+.Loop_avx2$suffix: - ################################################################ - # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4 - # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3 -@@ -1946,10 +2053,10 @@ $code.=<<___; - vpor 32(%rcx),$T4,$T4 # padbit, yes, always - - sub \$64,$len -- jnz .Loop_avx2 -+ jnz .Loop_avx2$suffix - - .byte 0x66,0x90 --.Ltail_avx2: -+.Ltail_avx2$suffix: - ################################################################ - # while above multiplications were by r^4 in all lanes, in last - # iteration we multiply least significant lane by r^4 and most -@@ -2087,37 +2194,29 @@ $code.=<<___; - vmovd %x#$H4,`4*4-48-64`($ctx) - ___ - $code.=<<___ if ($win64); -- vmovdqa 0x50(%r11),%xmm6 -- vmovdqa 0x60(%r11),%xmm7 -- vmovdqa 0x70(%r11),%xmm8 -- vmovdqa 0x80(%r11),%xmm9 -- vmovdqa 0x90(%r11),%xmm10 -- vmovdqa 0xa0(%r11),%xmm11 -- vmovdqa 0xb0(%r11),%xmm12 -- vmovdqa 0xc0(%r11),%xmm13 -- vmovdqa 0xd0(%r11),%xmm14 -- vmovdqa 0xe0(%r11),%xmm15 -- lea 0xf8(%r11),%rsp --.Ldo_avx2_epilogue: -+ vmovdqa -0xb0(%r10),%xmm6 -+ vmovdqa -0xa0(%r10),%xmm7 -+ vmovdqa -0x90(%r10),%xmm8 -+ vmovdqa -0x80(%r10),%xmm9 -+ vmovdqa -0x70(%r10),%xmm10 -+ vmovdqa -0x60(%r10),%xmm11 -+ vmovdqa -0x50(%r10),%xmm12 -+ vmovdqa -0x40(%r10),%xmm13 -+ vmovdqa -0x30(%r10),%xmm14 -+ vmovdqa -0x20(%r10),%xmm15 -+ lea -8(%r10),%rsp -+.Ldo_avx2_epilogue$suffix: - ___ - $code.=<<___ if (!$win64); -- lea 8(%r11),%rsp --.cfi_def_cfa %rsp,8 -+ lea -8(%r10),%rsp -+.cfi_def_cfa_register %rsp - ___ - $code.=<<___; - vzeroupper - ret - .cfi_endproc --.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 - ___ --####################################################################### --if ($avx>2) { --# On entry we have input length divisible by 64. But since inner loop --# processes 128 bytes per iteration, cases when length is not divisible --# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this --# reason stack layout is kept identical to poly1305_blocks_avx2. If not --# for this tail, we wouldn't have to even allocate stack frame... -- -+if($avx > 2 && $avx512) { - my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24)); - my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29)); - my $PADBIT="%zmm30"; -@@ -2128,32 +2227,29 @@ map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4)); - map(s/%y/%z/,($MASK)); - - $code.=<<___; --.type poly1305_blocks_avx512,\@function,4 --.align 32 --poly1305_blocks_avx512: - .cfi_startproc - .Lblocks_avx512: - mov \$15,%eax - kmovw %eax,%k2 - ___ - $code.=<<___ if (!$win64); -- lea -8(%rsp),%r11 --.cfi_def_cfa %r11,16 -+ lea 8(%rsp),%r10 -+.cfi_def_cfa_register %r10 - sub \$0x128,%rsp - ___ - $code.=<<___ if ($win64); -- lea -0xf8(%rsp),%r11 -+ lea 8(%rsp),%r10 - sub \$0x1c8,%rsp -- vmovdqa %xmm6,0x50(%r11) -- vmovdqa %xmm7,0x60(%r11) -- vmovdqa %xmm8,0x70(%r11) -- vmovdqa %xmm9,0x80(%r11) -- vmovdqa %xmm10,0x90(%r11) -- vmovdqa %xmm11,0xa0(%r11) -- vmovdqa %xmm12,0xb0(%r11) -- vmovdqa %xmm13,0xc0(%r11) -- vmovdqa %xmm14,0xd0(%r11) -- vmovdqa %xmm15,0xe0(%r11) -+ vmovdqa %xmm6,-0xb0(%r10) -+ vmovdqa %xmm7,-0xa0(%r10) -+ vmovdqa %xmm8,-0x90(%r10) -+ vmovdqa %xmm9,-0x80(%r10) -+ vmovdqa %xmm10,-0x70(%r10) -+ vmovdqa %xmm11,-0x60(%r10) -+ vmovdqa %xmm12,-0x50(%r10) -+ vmovdqa %xmm13,-0x40(%r10) -+ vmovdqa %xmm14,-0x30(%r10) -+ vmovdqa %xmm15,-0x20(%r10) - .Ldo_avx512_body: - ___ - $code.=<<___; -@@ -2679,7 +2775,7 @@ $code.=<<___; - - lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2 - add \$64,$len -- jnz .Ltail_avx2 -+ jnz .Ltail_avx2$suffix - - vpsubq $T2,$H2,$H2 # undo input accumulation - vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced -@@ -2690,29 +2786,61 @@ $code.=<<___; - vzeroall - ___ - $code.=<<___ if ($win64); -- movdqa 0x50(%r11),%xmm6 -- movdqa 0x60(%r11),%xmm7 -- movdqa 0x70(%r11),%xmm8 -- movdqa 0x80(%r11),%xmm9 -- movdqa 0x90(%r11),%xmm10 -- movdqa 0xa0(%r11),%xmm11 -- movdqa 0xb0(%r11),%xmm12 -- movdqa 0xc0(%r11),%xmm13 -- movdqa 0xd0(%r11),%xmm14 -- movdqa 0xe0(%r11),%xmm15 -- lea 0xf8(%r11),%rsp -+ movdqa -0xb0(%r10),%xmm6 -+ movdqa -0xa0(%r10),%xmm7 -+ movdqa -0x90(%r10),%xmm8 -+ movdqa -0x80(%r10),%xmm9 -+ movdqa -0x70(%r10),%xmm10 -+ movdqa -0x60(%r10),%xmm11 -+ movdqa -0x50(%r10),%xmm12 -+ movdqa -0x40(%r10),%xmm13 -+ movdqa -0x30(%r10),%xmm14 -+ movdqa -0x20(%r10),%xmm15 -+ lea -8(%r10),%rsp - .Ldo_avx512_epilogue: - ___ - $code.=<<___ if (!$win64); -- lea 8(%r11),%rsp --.cfi_def_cfa %rsp,8 -+ lea -8(%r10),%rsp -+.cfi_def_cfa_register %rsp - ___ - $code.=<<___; - ret - .cfi_endproc --.size poly1305_blocks_avx512,.-poly1305_blocks_avx512 - ___ --if ($avx>3) { -+ -+} -+ -+} -+ -+&declare_function("poly1305_blocks_avx2", 32, 4); -+poly1305_blocks_avxN(0); -+&end_function("poly1305_blocks_avx2"); -+ -+if($kernel) { -+ $code .= "#endif\n"; -+} -+ -+####################################################################### -+if ($avx>2) { -+# On entry we have input length divisible by 64. But since inner loop -+# processes 128 bytes per iteration, cases when length is not divisible -+# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this -+# reason stack layout is kept identical to poly1305_blocks_avx2. If not -+# for this tail, we wouldn't have to even allocate stack frame... -+ -+if($kernel) { -+ $code .= "#ifdef CONFIG_AS_AVX512\n"; -+} -+ -+&declare_function("poly1305_blocks_avx512", 32, 4); -+poly1305_blocks_avxN(1); -+&end_function("poly1305_blocks_avx512"); -+ -+if ($kernel) { -+ $code .= "#endif\n"; -+} -+ -+if (!$kernel && $avx>3) { - ######################################################################## - # VPMADD52 version using 2^44 radix. - # -@@ -3753,45 +3881,9 @@ poly1305_emit_base2_44: - .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 - ___ - } } } --$code.=<<___; --.align 64 --.Lconst: --.Lmask24: --.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 --.L129: --.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0 --.Lmask26: --.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 --.Lpermd_avx2: --.long 2,2,2,3,2,0,2,1 --.Lpermd_avx512: --.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 -- --.L2_44_inp_permd: --.long 0,1,1,2,2,3,7,7 --.L2_44_inp_shift: --.quad 0,12,24,64 --.L2_44_mask: --.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff --.L2_44_shift_rgt: --.quad 44,44,42,64 --.L2_44_shift_lft: --.quad 8,8,10,64 -- --.align 64 --.Lx_mask44: --.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff --.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff --.Lx_mask42: --.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff --.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff --___ - } --$code.=<<___; --.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>" --.align 16 --___ - -+if (!$kernel) - { # chacha20-poly1305 helpers - my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order -@@ -4038,17 +4130,17 @@ avx_handler: - - .section .pdata - .align 4 -- .rva .LSEH_begin_poly1305_init -- .rva .LSEH_end_poly1305_init -- .rva .LSEH_info_poly1305_init -- -- .rva .LSEH_begin_poly1305_blocks -- .rva .LSEH_end_poly1305_blocks -- .rva .LSEH_info_poly1305_blocks -- -- .rva .LSEH_begin_poly1305_emit -- .rva .LSEH_end_poly1305_emit -- .rva .LSEH_info_poly1305_emit -+ .rva .LSEH_begin_poly1305_init_x86_64 -+ .rva .LSEH_end_poly1305_init_x86_64 -+ .rva .LSEH_info_poly1305_init_x86_64 -+ -+ .rva .LSEH_begin_poly1305_blocks_x86_64 -+ .rva .LSEH_end_poly1305_blocks_x86_64 -+ .rva .LSEH_info_poly1305_blocks_x86_64 -+ -+ .rva .LSEH_begin_poly1305_emit_x86_64 -+ .rva .LSEH_end_poly1305_emit_x86_64 -+ .rva .LSEH_info_poly1305_emit_x86_64 - ___ - $code.=<<___ if ($avx); - .rva .LSEH_begin_poly1305_blocks_avx -@@ -4088,20 +4180,20 @@ ___ - $code.=<<___; - .section .xdata - .align 8 --.LSEH_info_poly1305_init: -+.LSEH_info_poly1305_init_x86_64: - .byte 9,0,0,0 - .rva se_handler -- .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init -+ .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 - --.LSEH_info_poly1305_blocks: -+.LSEH_info_poly1305_blocks_x86_64: - .byte 9,0,0,0 - .rva se_handler - .rva .Lblocks_body,.Lblocks_epilogue - --.LSEH_info_poly1305_emit: -+.LSEH_info_poly1305_emit_x86_64: - .byte 9,0,0,0 - .rva se_handler -- .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit -+ .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64 - ___ - $code.=<<___ if ($avx); - .LSEH_info_poly1305_blocks_avx_1: -@@ -4148,12 +4240,26 @@ $code.=<<___ if ($avx>2); - ___ - } - -+open SELF,$0; -+while(<SELF>) { -+ next if (/^#!/); -+ last if (!s/^#/\/\// and !/^$/); -+ print; -+} -+close SELF; -+ - foreach (split('\n',$code)) { - s/\`([^\`]*)\`/eval($1)/ge; - s/%r([a-z]+)#d/%e$1/g; - s/%r([0-9]+)#d/%r$1d/g; - s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g; - -+ if ($kernel) { -+ s/(^\.type.*),[0-9]+$/\1/; -+ s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/; -+ next if /^\.cfi.*/; -+ } -+ - print $_,"\n"; - } - close STDOUT; ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -1,8 +1,6 @@ --// SPDX-License-Identifier: GPL-2.0-or-later -+// SPDX-License-Identifier: GPL-2.0 OR MIT - /* -- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code -- * -- * Copyright (C) 2015 Martin Willi -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. - */ - - #include <crypto/algapi.h> -@@ -13,279 +11,170 @@ - #include <linux/jump_label.h> - #include <linux/kernel.h> - #include <linux/module.h> -+#include <asm/intel-family.h> - #include <asm/simd.h> - --asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src, -- const u32 *r, unsigned int blocks); --asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r, -- unsigned int blocks, const u32 *u); --asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r, -- unsigned int blocks, const u32 *u); -+asmlinkage void poly1305_init_x86_64(void *ctx, -+ const u8 key[POLY1305_KEY_SIZE]); -+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, -+ const size_t len, const u32 padbit); -+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], -+ const u32 nonce[4]); -+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], -+ const u32 nonce[4]); -+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len, -+ const u32 padbit); -+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len, -+ const u32 padbit); -+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, -+ const size_t len, const u32 padbit); - --static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); - static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); - --static inline u64 mlt(u64 a, u64 b) --{ -- return a * b; --} -- --static inline u32 sr(u64 v, u_char n) --{ -- return v >> n; --} -- --static inline u32 and(u32 v, u32 mask) --{ -- return v & mask; --} -- --static void poly1305_simd_mult(u32 *a, const u32 *b) --{ -- u8 m[POLY1305_BLOCK_SIZE]; -- -- memset(m, 0, sizeof(m)); -- /* The poly1305 block function adds a hi-bit to the accumulator which -- * we don't need for key multiplication; compensate for it. */ -- a[4] -= 1 << 24; -- poly1305_block_sse2(a, m, b, 1); --} -- --static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key) --{ -- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ -- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff; -- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03; -- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff; -- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff; -- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff; --} -+struct poly1305_arch_internal { -+ union { -+ struct { -+ u32 h[5]; -+ u32 is_base2_26; -+ }; -+ u64 hs[3]; -+ }; -+ u64 r[2]; -+ u64 pad; -+ struct { u32 r2, r1, r4, r3; } rn[9]; -+}; - --static void poly1305_integer_blocks(struct poly1305_state *state, -- const struct poly1305_key *key, -- const void *src, -- unsigned int nblocks, u32 hibit) -+/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit -+ * the unfortunate situation of using AVX and then having to go back to scalar -+ * -- because the user is silly and has called the update function from two -+ * separate contexts -- then we need to convert back to the original base before -+ * proceeding. It is possible to reason that the initial reduction below is -+ * sufficient given the implementation invariants. However, for an avoidance of -+ * doubt and because this is not performance critical, we do the full reduction -+ * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py -+ */ -+static void convert_to_base2_64(void *ctx) - { -- u32 r0, r1, r2, r3, r4; -- u32 s1, s2, s3, s4; -- u32 h0, h1, h2, h3, h4; -- u64 d0, d1, d2, d3, d4; -+ struct poly1305_arch_internal *state = ctx; -+ u32 cy; - -- if (!nblocks) -+ if (!state->is_base2_26) - return; - -- r0 = key->r[0]; -- r1 = key->r[1]; -- r2 = key->r[2]; -- r3 = key->r[3]; -- r4 = key->r[4]; -- -- s1 = r1 * 5; -- s2 = r2 * 5; -- s3 = r3 * 5; -- s4 = r4 * 5; -- -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- do { -- /* h += m[i] */ -- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; -- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; -- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; -- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; -- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24); -- -- /* h *= r */ -- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + -- mlt(h3, s2) + mlt(h4, s1); -- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + -- mlt(h3, s3) + mlt(h4, s2); -- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + -- mlt(h3, s4) + mlt(h4, s3); -- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + -- mlt(h3, r0) + mlt(h4, s4); -- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + -- mlt(h3, r1) + mlt(h4, r0); -- -- /* (partial) h %= p */ -- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); -- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); -- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); -- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); -- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); -- h1 += h0 >> 26; h0 = h0 & 0x3ffffff; -- -- src += POLY1305_BLOCK_SIZE; -- } while (--nblocks); -- -- state->h[0] = h0; -- state->h[1] = h1; -- state->h[2] = h2; -- state->h[3] = h3; -- state->h[4] = h4; --} -- --static void poly1305_integer_emit(const struct poly1305_state *state, void *dst) --{ -- u32 h0, h1, h2, h3, h4; -- u32 g0, g1, g2, g3, g4; -- u32 mask; -- -- /* fully carry h */ -- h0 = state->h[0]; -- h1 = state->h[1]; -- h2 = state->h[2]; -- h3 = state->h[3]; -- h4 = state->h[4]; -- -- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; -- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; -- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; -- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; -- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; -- -- /* compute h + -p */ -- g0 = h0 + 5; -- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; -- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; -- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; -- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; -- -- /* select h if h < p, or h + -p if h >= p */ -- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; -- g0 &= mask; -- g1 &= mask; -- g2 &= mask; -- g3 &= mask; -- g4 &= mask; -- mask = ~mask; -- h0 = (h0 & mask) | g0; -- h1 = (h1 & mask) | g1; -- h2 = (h2 & mask) | g2; -- h3 = (h3 & mask) | g3; -- h4 = (h4 & mask) | g4; -- -- /* h = h % (2^128) */ -- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0); -- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4); -- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8); -- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12); --} -- --void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key) --{ -- poly1305_integer_setkey(desc->opaque_r, key); -- desc->s[0] = get_unaligned_le32(key + 16); -- desc->s[1] = get_unaligned_le32(key + 20); -- desc->s[2] = get_unaligned_le32(key + 24); -- desc->s[3] = get_unaligned_le32(key + 28); -- poly1305_core_init(&desc->h); -- desc->buflen = 0; -- desc->sset = true; -- desc->rset = 1; --} --EXPORT_SYMBOL_GPL(poly1305_init_arch); -- --static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen) --{ -- if (!dctx->sset) { -- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_integer_setkey(dctx->r, src); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->rset = 1; -- } -- if (srclen >= POLY1305_BLOCK_SIZE) { -- dctx->s[0] = get_unaligned_le32(src + 0); -- dctx->s[1] = get_unaligned_le32(src + 4); -- dctx->s[2] = get_unaligned_le32(src + 8); -- dctx->s[3] = get_unaligned_le32(src + 12); -- src += POLY1305_BLOCK_SIZE; -- srclen -= POLY1305_BLOCK_SIZE; -- dctx->sset = true; -- } -+ cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; -+ cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; -+ cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; -+ cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; -+ state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; -+ state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); -+ state->hs[2] = state->h[4] >> 24; -+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) -+ cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); -+ state->hs[2] &= 3; -+ state->hs[0] += cy; -+ state->hs[1] += (cy = ULT(state->hs[0], cy)); -+ state->hs[2] += ULT(state->hs[1], cy); -+#undef ULT -+ state->is_base2_26 = 0; -+} -+ -+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) -+{ -+ poly1305_init_x86_64(ctx, key); -+} -+ -+static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, -+ const u32 padbit) -+{ -+ struct poly1305_arch_internal *state = ctx; -+ -+ /* SIMD disables preemption, so relax after processing each page. */ -+ BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || -+ PAGE_SIZE % POLY1305_BLOCK_SIZE); -+ -+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || -+ (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || -+ !crypto_simd_usable()) { -+ convert_to_base2_64(ctx); -+ poly1305_blocks_x86_64(ctx, inp, len, padbit); -+ return; - } -- return srclen; --} - --static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen) --{ -- unsigned int datalen; -+ for (;;) { -+ const size_t bytes = min_t(size_t, len, PAGE_SIZE); - -- if (unlikely(!dctx->sset)) { -- datalen = crypto_poly1305_setdesckey(dctx, src, srclen); -- src += srclen - datalen; -- srclen = datalen; -- } -- if (srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src, -- srclen / POLY1305_BLOCK_SIZE, 1); -- srclen %= POLY1305_BLOCK_SIZE; -+ kernel_fpu_begin(); -+ if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) -+ poly1305_blocks_avx512(ctx, inp, bytes, padbit); -+ else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2)) -+ poly1305_blocks_avx2(ctx, inp, bytes, padbit); -+ else -+ poly1305_blocks_avx(ctx, inp, bytes, padbit); -+ kernel_fpu_end(); -+ len -= bytes; -+ if (!len) -+ break; -+ inp += bytes; - } -- return srclen; - } - --static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, -- const u8 *src, unsigned int srclen) --{ -- unsigned int blocks, datalen; -+static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], -+ const u32 nonce[4]) -+{ -+ struct poly1305_arch_internal *state = ctx; -+ -+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || -+ !state->is_base2_26 || !crypto_simd_usable()) { -+ convert_to_base2_64(ctx); -+ poly1305_emit_x86_64(ctx, mac, nonce); -+ } else -+ poly1305_emit_avx(ctx, mac, nonce); -+} -+ -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+{ -+ poly1305_simd_init(&dctx->h, key); -+ dctx->s[0] = get_unaligned_le32(&key[16]); -+ dctx->s[1] = get_unaligned_le32(&key[20]); -+ dctx->s[2] = get_unaligned_le32(&key[24]); -+ dctx->s[3] = get_unaligned_le32(&key[28]); -+ dctx->buflen = 0; -+ dctx->sset = true; -+} -+EXPORT_SYMBOL(poly1305_init_arch); - -+static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, -+ const u8 *inp, unsigned int len) -+{ -+ unsigned int acc = 0; - if (unlikely(!dctx->sset)) { -- datalen = crypto_poly1305_setdesckey(dctx, src, srclen); -- src += srclen - datalen; -- srclen = datalen; -- } -- -- if (IS_ENABLED(CONFIG_AS_AVX2) && -- static_branch_likely(&poly1305_use_avx2) && -- srclen >= POLY1305_BLOCK_SIZE * 4) { -- if (unlikely(dctx->rset < 4)) { -- if (dctx->rset < 2) { -- dctx->r[1] = dctx->r[0]; -- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); -- } -- dctx->r[2] = dctx->r[1]; -- poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r); -- dctx->r[3] = dctx->r[2]; -- poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r); -- dctx->rset = 4; -+ if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) { -+ poly1305_simd_init(&dctx->h, inp); -+ inp += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ acc += POLY1305_BLOCK_SIZE; -+ dctx->rset = 1; - } -- blocks = srclen / (POLY1305_BLOCK_SIZE * 4); -- poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks, -- dctx->r[1].r); -- src += POLY1305_BLOCK_SIZE * 4 * blocks; -- srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; -- } -- -- if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { -- if (unlikely(dctx->rset < 2)) { -- dctx->r[1] = dctx->r[0]; -- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r); -- dctx->rset = 2; -+ if (len >= POLY1305_BLOCK_SIZE) { -+ dctx->s[0] = get_unaligned_le32(&inp[0]); -+ dctx->s[1] = get_unaligned_le32(&inp[4]); -+ dctx->s[2] = get_unaligned_le32(&inp[8]); -+ dctx->s[3] = get_unaligned_le32(&inp[12]); -+ inp += POLY1305_BLOCK_SIZE; -+ len -= POLY1305_BLOCK_SIZE; -+ acc += POLY1305_BLOCK_SIZE; -+ dctx->sset = true; - } -- blocks = srclen / (POLY1305_BLOCK_SIZE * 2); -- poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r, -- blocks, dctx->r[1].r); -- src += POLY1305_BLOCK_SIZE * 2 * blocks; -- srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; -- } -- if (srclen >= POLY1305_BLOCK_SIZE) { -- poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1); -- srclen -= POLY1305_BLOCK_SIZE; - } -- return srclen; -+ return acc; - } - - void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, - unsigned int srclen) - { -- unsigned int bytes; -+ unsigned int bytes, used; - - if (unlikely(dctx->buflen)) { - bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); -@@ -295,31 +184,19 @@ void poly1305_update_arch(struct poly130 - dctx->buflen += bytes; - - if (dctx->buflen == POLY1305_BLOCK_SIZE) { -- if (static_branch_likely(&poly1305_use_simd) && -- likely(crypto_simd_usable())) { -- kernel_fpu_begin(); -- poly1305_simd_blocks(dctx, dctx->buf, -- POLY1305_BLOCK_SIZE); -- kernel_fpu_end(); -- } else { -- poly1305_scalar_blocks(dctx, dctx->buf, -- POLY1305_BLOCK_SIZE); -- } -+ if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE))) -+ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); - dctx->buflen = 0; - } - } - - if (likely(srclen >= POLY1305_BLOCK_SIZE)) { -- if (static_branch_likely(&poly1305_use_simd) && -- likely(crypto_simd_usable())) { -- kernel_fpu_begin(); -- bytes = poly1305_simd_blocks(dctx, src, srclen); -- kernel_fpu_end(); -- } else { -- bytes = poly1305_scalar_blocks(dctx, src, srclen); -- } -- src += srclen - bytes; -- srclen = bytes; -+ bytes = round_down(srclen, POLY1305_BLOCK_SIZE); -+ srclen -= bytes; -+ used = crypto_poly1305_setdctxkey(dctx, src, bytes); -+ if (likely(bytes - used)) -+ poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1); -+ src += bytes; - } - - if (unlikely(srclen)) { -@@ -329,31 +206,17 @@ void poly1305_update_arch(struct poly130 - } - EXPORT_SYMBOL(poly1305_update_arch); - --void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst) -+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) - { -- __le32 digest[4]; -- u64 f = 0; -- -- if (unlikely(desc->buflen)) { -- desc->buf[desc->buflen++] = 1; -- memset(desc->buf + desc->buflen, 0, -- POLY1305_BLOCK_SIZE - desc->buflen); -- poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0); -+ if (unlikely(dctx->buflen)) { -+ dctx->buf[dctx->buflen++] = 1; -+ memset(dctx->buf + dctx->buflen, 0, -+ POLY1305_BLOCK_SIZE - dctx->buflen); -+ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - -- poly1305_integer_emit(&desc->h, digest); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0]; -- put_unaligned_le32(f, dst + 0); -- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1]; -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2]; -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3]; -- put_unaligned_le32(f, dst + 12); -- -- *desc = (struct poly1305_desc_ctx){}; -+ poly1305_simd_emit(&dctx->h, dst, dctx->s); -+ *dctx = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL(poly1305_final_arch); - -@@ -361,38 +224,34 @@ static int crypto_poly1305_init(struct s - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - -- poly1305_core_init(&dctx->h); -- dctx->buflen = 0; -- dctx->rset = 0; -- dctx->sset = false; -- -+ *dctx = (struct poly1305_desc_ctx){}; - return 0; - } - --static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) -+static int crypto_poly1305_update(struct shash_desc *desc, -+ const u8 *src, unsigned int srclen) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - -- if (unlikely(!dctx->sset)) -- return -ENOKEY; -- -- poly1305_final_arch(dctx, dst); -+ poly1305_update_arch(dctx, src, srclen); - return 0; - } - --static int poly1305_simd_update(struct shash_desc *desc, -- const u8 *src, unsigned int srclen) -+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst) - { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); - -- poly1305_update_arch(dctx, src, srclen); -+ if (unlikely(!dctx->sset)) -+ return -ENOKEY; -+ -+ poly1305_final_arch(dctx, dst); - return 0; - } - - static struct shash_alg alg = { - .digestsize = POLY1305_DIGEST_SIZE, - .init = crypto_poly1305_init, -- .update = poly1305_simd_update, -+ .update = crypto_poly1305_update, - .final = crypto_poly1305_final, - .descsize = sizeof(struct poly1305_desc_ctx), - .base = { -@@ -406,17 +265,19 @@ static struct shash_alg alg = { - - static int __init poly1305_simd_mod_init(void) - { -- if (!boot_cpu_has(X86_FEATURE_XMM2)) -- return 0; -- -- static_branch_enable(&poly1305_use_simd); -- -- if (IS_ENABLED(CONFIG_AS_AVX2) && -- boot_cpu_has(X86_FEATURE_AVX) && -+ if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) -+ static_branch_enable(&poly1305_use_avx); -+ if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx2); -- -+ if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && -+ boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && -+ /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ -+ boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X) -+ static_branch_enable(&poly1305_use_avx512); - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0; - } - -@@ -430,7 +291,7 @@ module_init(poly1305_simd_mod_init); - module_exit(poly1305_simd_mod_exit); - - MODULE_LICENSE("GPL"); --MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); - MODULE_DESCRIPTION("Poly1305 authenticator"); - MODULE_ALIAS_CRYPTO("poly1305"); - MODULE_ALIAS_CRYPTO("poly1305-simd"); ---- a/lib/crypto/Kconfig -+++ b/lib/crypto/Kconfig -@@ -90,7 +90,7 @@ config CRYPTO_LIB_DES - config CRYPTO_LIB_POLY1305_RSIZE - int - default 2 if MIPS -- default 4 if X86_64 -+ default 11 if X86_64 - default 9 if ARM || ARM64 - default 1 - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0044-crypto-arm-arm64-mips-poly1305-remove-redundant-non-.patch b/target/linux/generic/backport-5.4/080-wireguard-0044-crypto-arm-arm64-mips-poly1305-remove-redundant-non-.patch deleted file mode 100644 index b95b998880..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0044-crypto-arm-arm64-mips-poly1305-remove-redundant-non-.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 5 Jan 2020 22:40:49 -0500 -Subject: [PATCH] crypto: {arm,arm64,mips}/poly1305 - remove redundant - non-reduction from emit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 31899908a0d248b030b4464425b86c717e0007d4 upstream. - -This appears to be some kind of copy and paste error, and is actually -dead code. - -Pre: f = 0 ⇒ (f >> 32) = 0 - f = (f >> 32) + le32_to_cpu(digest[0]); -Post: 0 ≤ f < 2³² - put_unaligned_le32(f, dst); - -Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 - f = (f >> 32) + le32_to_cpu(digest[1]); -Post: 0 ≤ f < 2³² - put_unaligned_le32(f, dst + 4); - -Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 - f = (f >> 32) + le32_to_cpu(digest[2]); -Post: 0 ≤ f < 2³² - put_unaligned_le32(f, dst + 8); - -Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0 - f = (f >> 32) + le32_to_cpu(digest[3]); -Post: 0 ≤ f < 2³² - put_unaligned_le32(f, dst + 12); - -Therefore this sequence is redundant. And Andy's code appears to handle -misalignment acceptably. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Tested-by: Ard Biesheuvel <ardb@kernel.org> -Reviewed-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/poly1305-glue.c | 18 ++---------------- - arch/arm64/crypto/poly1305-glue.c | 18 ++---------------- - arch/mips/crypto/poly1305-glue.c | 18 ++---------------- - 3 files changed, 6 insertions(+), 48 deletions(-) - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -20,7 +20,7 @@ - - void poly1305_init_arm(void *state, const u8 *key); - void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); --void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); -+void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); - - void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) - { -@@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch); - - void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) - { -- __le32 digest[4]; -- u64 f = 0; -- - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, -@@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305 - poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - -- poly1305_emit_arm(&dctx->h, digest, dctx->s); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]); -- put_unaligned_le32(f, dst); -- f = (f >> 32) + le32_to_cpu(digest[1]); -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]); -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]); -- put_unaligned_le32(f, dst + 12); -- -+ poly1305_emit_arm(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL(poly1305_final_arch); ---- a/arch/arm64/crypto/poly1305-glue.c -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -21,7 +21,7 @@ - asmlinkage void poly1305_init_arm64(void *state, const u8 *key); - asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); - asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); --asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce); -+asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); - - static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -@@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch); - - void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) - { -- __le32 digest[4]; -- u64 f = 0; -- - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, -@@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305 - poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - -- poly1305_emit(&dctx->h, digest, dctx->s); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]); -- put_unaligned_le32(f, dst); -- f = (f >> 32) + le32_to_cpu(digest[1]); -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]); -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]); -- put_unaligned_le32(f, dst + 12); -- -+ poly1305_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL(poly1305_final_arch); ---- a/arch/mips/crypto/poly1305-glue.c -+++ b/arch/mips/crypto/poly1305-glue.c -@@ -15,7 +15,7 @@ - - asmlinkage void poly1305_init_mips(void *state, const u8 *key); - asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); --asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); -+asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); - - void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) - { -@@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch); - - void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) - { -- __le32 digest[4]; -- u64 f = 0; -- - if (unlikely(dctx->buflen)) { - dctx->buf[dctx->buflen++] = 1; - memset(dctx->buf + dctx->buflen, 0, -@@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305 - poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); - } - -- poly1305_emit_mips(&dctx->h, digest, dctx->s); -- -- /* mac = (h + s) % (2^128) */ -- f = (f >> 32) + le32_to_cpu(digest[0]); -- put_unaligned_le32(f, dst); -- f = (f >> 32) + le32_to_cpu(digest[1]); -- put_unaligned_le32(f, dst + 4); -- f = (f >> 32) + le32_to_cpu(digest[2]); -- put_unaligned_le32(f, dst + 8); -- f = (f >> 32) + le32_to_cpu(digest[3]); -- put_unaligned_le32(f, dst + 12); -- -+ poly1305_emit_mips(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; - } - EXPORT_SYMBOL(poly1305_final_arch); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0045-crypto-curve25519-Fix-selftest-build-error.patch b/target/linux/generic/backport-5.4/080-wireguard-0045-crypto-curve25519-Fix-selftest-build-error.patch deleted file mode 100644 index fa8d8fd6a9..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0045-crypto-curve25519-Fix-selftest-build-error.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Herbert Xu <herbert@gondor.apana.org.au> -Date: Wed, 8 Jan 2020 12:37:35 +0800 -Subject: [PATCH] crypto: curve25519 - Fix selftest build error - -commit a8bdf2c42ee4d1ee42af1f3601f85de94e70a421 upstream. - -If CRYPTO_CURVE25519 is y, CRYPTO_LIB_CURVE25519_GENERIC will be -y, but CRYPTO_LIB_CURVE25519 may be set to m, this causes build -errors: - -lib/crypto/curve25519-selftest.o: In function `curve25519': -curve25519-selftest.c:(.text.unlikely+0xc): undefined reference to `curve25519_arch' -lib/crypto/curve25519-selftest.o: In function `curve25519_selftest': -curve25519-selftest.c:(.init.text+0x17e): undefined reference to `curve25519_base_arch' - -This is because the curve25519 self-test code is being controlled -by the GENERIC option rather than the overall CURVE25519 option, -as is the case with blake2s. To recap, the GENERIC and ARCH options -for CURVE25519 are internal only and selected by users such as -the Crypto API, or the externally visible CURVE25519 option which -in turn is selected by wireguard. The self-test is specific to the -the external CURVE25519 option and should not be enabled by the -Crypto API. - -This patch fixes this by splitting the GENERIC module from the -CURVE25519 module with the latter now containing just the self-test. - -Reported-by: Hulk Robot <hulkci@huawei.com> -Fixes: aa127963f1ca ("crypto: lib/curve25519 - re-add selftests") -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/Makefile | 9 ++++++--- - lib/crypto/curve25519-generic.c | 24 ++++++++++++++++++++++++ - lib/crypto/curve25519.c | 7 ------- - 3 files changed, 30 insertions(+), 10 deletions(-) - create mode 100644 lib/crypto/curve25519-generic.c - ---- a/lib/crypto/Makefile -+++ b/lib/crypto/Makefile -@@ -19,9 +19,12 @@ libblake2s-y += blake2s.o - obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o - libchacha20poly1305-y += chacha20poly1305.o - --obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o --libcurve25519-y := curve25519-fiat32.o --libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -+obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o -+libcurve25519-generic-y := curve25519-fiat32.o -+libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -+libcurve25519-generic-y += curve25519-generic.o -+ -+obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o - libcurve25519-y += curve25519.o - - obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o ---- /dev/null -+++ b/lib/crypto/curve25519-generic.c -@@ -0,0 +1,24 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This is an implementation of the Curve25519 ECDH algorithm, using either -+ * a 32-bit implementation or a 64-bit implementation with 128-bit integers, -+ * depending on what is supported by the target compiler. -+ * -+ * Information: https://cr.yp.to/ecdh.html -+ */ -+ -+#include <crypto/curve25519.h> -+#include <linux/module.h> -+ -+const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; -+const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; -+ -+EXPORT_SYMBOL(curve25519_null_point); -+EXPORT_SYMBOL(curve25519_base_point); -+EXPORT_SYMBOL(curve25519_generic); -+ -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("Curve25519 scalar multiplication"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); ---- a/lib/crypto/curve25519.c -+++ b/lib/crypto/curve25519.c -@@ -15,13 +15,6 @@ - - bool curve25519_selftest(void); - --const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; --const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; -- --EXPORT_SYMBOL(curve25519_null_point); --EXPORT_SYMBOL(curve25519_base_point); --EXPORT_SYMBOL(curve25519_generic); -- - static int __init mod_init(void) - { - if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) && diff --git a/target/linux/generic/backport-5.4/080-wireguard-0046-crypto-x86-poly1305-fix-.gitignore-typo.patch b/target/linux/generic/backport-5.4/080-wireguard-0046-crypto-x86-poly1305-fix-.gitignore-typo.patch deleted file mode 100644 index 27f0417ac3..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0046-crypto-x86-poly1305-fix-.gitignore-typo.patch +++ /dev/null @@ -1,23 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 16 Jan 2020 18:23:55 +0100 -Subject: [PATCH] crypto: x86/poly1305 - fix .gitignore typo - -commit 1f6868995326cc82102049e349d8dbd116bdb656 upstream. - -Admist the kbuild robot induced changes, the .gitignore file for the -generated file wasn't updated with the non-clashing filename. This -commit adjusts that. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/.gitignore | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/arch/x86/crypto/.gitignore -+++ b/arch/x86/crypto/.gitignore -@@ -1 +1 @@ --poly1305-x86_64.S -+poly1305-x86_64-cryptogams.S diff --git a/target/linux/generic/backport-5.4/080-wireguard-0047-crypto-chacha20poly1305-add-back-missing-test-vector.patch b/target/linux/generic/backport-5.4/080-wireguard-0047-crypto-chacha20poly1305-add-back-missing-test-vector.patch deleted file mode 100644 index eda969577a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0047-crypto-chacha20poly1305-add-back-missing-test-vector.patch +++ /dev/null @@ -1,1858 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 16 Jan 2020 21:26:34 +0100 -Subject: [PATCH] crypto: chacha20poly1305 - add back missing test vectors and - test chunking - -commit 72c7943792c9e7788ddd182337bcf8f650cf56f5 upstream. - -When this was originally ported, the 12-byte nonce vectors were left out -to keep things simple. I agree that we don't need nor want a library -interface for 12-byte nonces. But these test vectors were specially -crafted to look at issues in the underlying primitives and related -interactions. Therefore, we actually want to keep around all of the -test vectors, and simply have a helper function to test them with. - -Secondly, the sglist-based chunking code in the library interface is -rather complicated, so this adds a developer-only test for ensuring that -all the book keeping is correct, across a wide array of possibilities. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/chacha20poly1305-selftest.c | 1712 +++++++++++++++++++++++- - 1 file changed, 1698 insertions(+), 14 deletions(-) - ---- a/lib/crypto/chacha20poly1305-selftest.c -+++ b/lib/crypto/chacha20poly1305-selftest.c -@@ -4,6 +4,7 @@ - */ - - #include <crypto/chacha20poly1305.h> -+#include <crypto/chacha.h> - #include <crypto/poly1305.h> - - #include <asm/unaligned.h> -@@ -1926,6 +1927,1104 @@ static const u8 enc_key012[] __initconst - 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64 - }; - -+/* wycheproof - rfc7539 */ -+static const u8 enc_input013[] __initconst = { -+ 0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61, -+ 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c, -+ 0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20, -+ 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73, -+ 0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39, -+ 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63, -+ 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66, -+ 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f, -+ 0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20, -+ 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20, -+ 0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75, -+ 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73, -+ 0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f, -+ 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69, -+ 0x74, 0x2e -+}; -+static const u8 enc_output013[] __initconst = { -+ 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, -+ 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2, -+ 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe, -+ 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6, -+ 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, -+ 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b, -+ 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29, -+ 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36, -+ 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c, -+ 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58, -+ 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94, -+ 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc, -+ 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d, -+ 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b, -+ 0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, -+ 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, -+ 0x06, 0x91 -+}; -+static const u8 enc_assoc013[] __initconst = { -+ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, -+ 0xc4, 0xc5, 0xc6, 0xc7 -+}; -+static const u8 enc_nonce013[] __initconst = { -+ 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43, -+ 0x44, 0x45, 0x46, 0x47 -+}; -+static const u8 enc_key013[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input014[] __initconst = { }; -+static const u8 enc_output014[] __initconst = { -+ 0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5, -+ 0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d -+}; -+static const u8 enc_assoc014[] __initconst = { }; -+static const u8 enc_nonce014[] __initconst = { -+ 0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1, -+ 0xea, 0x12, 0x37, 0x9d -+}; -+static const u8 enc_key014[] __initconst = { -+ 0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96, -+ 0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0, -+ 0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08, -+ 0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input015[] __initconst = { }; -+static const u8 enc_output015[] __initconst = { -+ 0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b, -+ 0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09 -+}; -+static const u8 enc_assoc015[] __initconst = { -+ 0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10 -+}; -+static const u8 enc_nonce015[] __initconst = { -+ 0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16, -+ 0xa3, 0xc6, 0xf6, 0x89 -+}; -+static const u8 enc_key015[] __initconst = { -+ 0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb, -+ 0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22, -+ 0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63, -+ 0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input016[] __initconst = { -+ 0x2a -+}; -+static const u8 enc_output016[] __initconst = { -+ 0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80, -+ 0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75, -+ 0x22 -+}; -+static const u8 enc_assoc016[] __initconst = { }; -+static const u8 enc_nonce016[] __initconst = { -+ 0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd, -+ 0xee, 0xab, 0x60, 0xf1 -+}; -+static const u8 enc_key016[] __initconst = { -+ 0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50, -+ 0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa, -+ 0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a, -+ 0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input017[] __initconst = { -+ 0x51 -+}; -+static const u8 enc_output017[] __initconst = { -+ 0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7, -+ 0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72, -+ 0xb9 -+}; -+static const u8 enc_assoc017[] __initconst = { -+ 0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53 -+}; -+static const u8 enc_nonce017[] __initconst = { -+ 0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2, -+ 0x78, 0x2f, 0x44, 0x03 -+}; -+static const u8 enc_key017[] __initconst = { -+ 0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5, -+ 0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f, -+ 0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5, -+ 0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input018[] __initconst = { -+ 0x5c, 0x60 -+}; -+static const u8 enc_output018[] __initconst = { -+ 0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39, -+ 0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22, -+ 0xe5, 0xe2 -+}; -+static const u8 enc_assoc018[] __initconst = { }; -+static const u8 enc_nonce018[] __initconst = { -+ 0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34, -+ 0x7e, 0x03, 0xf2, 0xdb -+}; -+static const u8 enc_key018[] __initconst = { -+ 0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89, -+ 0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e, -+ 0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f, -+ 0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input019[] __initconst = { -+ 0xdd, 0xf2 -+}; -+static const u8 enc_output019[] __initconst = { -+ 0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec, -+ 0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24, -+ 0x37, 0xa2 -+}; -+static const u8 enc_assoc019[] __initconst = { -+ 0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf -+}; -+static const u8 enc_nonce019[] __initconst = { -+ 0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90, -+ 0xb6, 0x04, 0x0a, 0xc6 -+}; -+static const u8 enc_key019[] __initconst = { -+ 0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48, -+ 0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff, -+ 0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44, -+ 0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input020[] __initconst = { -+ 0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31 -+}; -+static const u8 enc_output020[] __initconst = { -+ 0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed, -+ 0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28, -+ 0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42 -+}; -+static const u8 enc_assoc020[] __initconst = { }; -+static const u8 enc_nonce020[] __initconst = { -+ 0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59, -+ 0x6d, 0xc5, 0x5b, 0xb7 -+}; -+static const u8 enc_key020[] __initconst = { -+ 0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f, -+ 0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f, -+ 0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3, -+ 0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input021[] __initconst = { -+ 0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90 -+}; -+static const u8 enc_output021[] __initconst = { -+ 0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18, -+ 0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5, -+ 0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba -+}; -+static const u8 enc_assoc021[] __initconst = { -+ 0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53 -+}; -+static const u8 enc_nonce021[] __initconst = { -+ 0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98, -+ 0xde, 0x94, 0x83, 0x96 -+}; -+static const u8 enc_key021[] __initconst = { -+ 0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5, -+ 0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4, -+ 0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda, -+ 0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input022[] __initconst = { -+ 0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed -+}; -+static const u8 enc_output022[] __initconst = { -+ 0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17, -+ 0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93, -+ 0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21 -+}; -+static const u8 enc_assoc022[] __initconst = { }; -+static const u8 enc_nonce022[] __initconst = { -+ 0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2, -+ 0x36, 0x18, 0x23, 0xd3 -+}; -+static const u8 enc_key022[] __initconst = { -+ 0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf, -+ 0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d, -+ 0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7, -+ 0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input023[] __initconst = { -+ 0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf -+}; -+static const u8 enc_output023[] __initconst = { -+ 0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0, -+ 0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0, -+ 0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18 -+}; -+static const u8 enc_assoc023[] __initconst = { -+ 0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d -+}; -+static const u8 enc_nonce023[] __initconst = { -+ 0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33, -+ 0x66, 0x48, 0x4a, 0x78 -+}; -+static const u8 enc_key023[] __initconst = { -+ 0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54, -+ 0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a, -+ 0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe, -+ 0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input024[] __initconst = { -+ 0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c, -+ 0x36, 0x8d, 0x14, 0xe0 -+}; -+static const u8 enc_output024[] __initconst = { -+ 0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a, -+ 0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27, -+ 0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10, -+ 0x6d, 0xcb, 0x29, 0xb4 -+}; -+static const u8 enc_assoc024[] __initconst = { }; -+static const u8 enc_nonce024[] __initconst = { -+ 0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e, -+ 0x07, 0x53, 0x86, 0x56 -+}; -+static const u8 enc_key024[] __initconst = { -+ 0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0, -+ 0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39, -+ 0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5, -+ 0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input025[] __initconst = { -+ 0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7, -+ 0x59, 0xb1, 0xa0, 0xda -+}; -+static const u8 enc_output025[] __initconst = { -+ 0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38, -+ 0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c, -+ 0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7, -+ 0xcf, 0x05, 0x07, 0x2f -+}; -+static const u8 enc_assoc025[] __initconst = { -+ 0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9 -+}; -+static const u8 enc_nonce025[] __initconst = { -+ 0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d, -+ 0xd9, 0x06, 0xe9, 0xce -+}; -+static const u8 enc_key025[] __initconst = { -+ 0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40, -+ 0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70, -+ 0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e, -+ 0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input026[] __initconst = { -+ 0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac, -+ 0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07 -+}; -+static const u8 enc_output026[] __initconst = { -+ 0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf, -+ 0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a, -+ 0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79, -+ 0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2 -+}; -+static const u8 enc_assoc026[] __initconst = { }; -+static const u8 enc_nonce026[] __initconst = { -+ 0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda, -+ 0xd4, 0x61, 0xd2, 0x3c -+}; -+static const u8 enc_key026[] __initconst = { -+ 0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda, -+ 0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77, -+ 0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0, -+ 0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input027[] __initconst = { -+ 0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f, -+ 0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73 -+}; -+static const u8 enc_output027[] __initconst = { -+ 0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81, -+ 0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe, -+ 0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9, -+ 0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17 -+}; -+static const u8 enc_assoc027[] __initconst = { -+ 0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12 -+}; -+static const u8 enc_nonce027[] __initconst = { -+ 0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14, -+ 0xc5, 0x03, 0x5b, 0x6a -+}; -+static const u8 enc_key027[] __initconst = { -+ 0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f, -+ 0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38, -+ 0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b, -+ 0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input028[] __initconst = { -+ 0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0, -+ 0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88 -+}; -+static const u8 enc_output028[] __initconst = { -+ 0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4, -+ 0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13, -+ 0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a, -+ 0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c -+}; -+static const u8 enc_assoc028[] __initconst = { }; -+static const u8 enc_nonce028[] __initconst = { -+ 0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8, -+ 0x47, 0x40, 0xad, 0x9b -+}; -+static const u8 enc_key028[] __initconst = { -+ 0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7, -+ 0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7, -+ 0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63, -+ 0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input029[] __initconst = { -+ 0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09, -+ 0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6 -+}; -+static const u8 enc_output029[] __initconst = { -+ 0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3, -+ 0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c, -+ 0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc, -+ 0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d -+}; -+static const u8 enc_assoc029[] __initconst = { -+ 0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff -+}; -+static const u8 enc_nonce029[] __initconst = { -+ 0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80, -+ 0x07, 0x1f, 0x52, 0x66 -+}; -+static const u8 enc_key029[] __initconst = { -+ 0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8, -+ 0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14, -+ 0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d, -+ 0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input030[] __initconst = { -+ 0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15, -+ 0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e, -+ 0x1f -+}; -+static const u8 enc_output030[] __initconst = { -+ 0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd, -+ 0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74, -+ 0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80, -+ 0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08, -+ 0x13 -+}; -+static const u8 enc_assoc030[] __initconst = { }; -+static const u8 enc_nonce030[] __initconst = { -+ 0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42, -+ 0xdc, 0x03, 0x44, 0x5d -+}; -+static const u8 enc_key030[] __initconst = { -+ 0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21, -+ 0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e, -+ 0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b, -+ 0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input031[] __initconst = { -+ 0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88, -+ 0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c, -+ 0x9c -+}; -+static const u8 enc_output031[] __initconst = { -+ 0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b, -+ 0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e, -+ 0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38, -+ 0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c, -+ 0xeb -+}; -+static const u8 enc_assoc031[] __initconst = { -+ 0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79 -+}; -+static const u8 enc_nonce031[] __initconst = { -+ 0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10, -+ 0x63, 0x3d, 0x99, 0x3d -+}; -+static const u8 enc_key031[] __initconst = { -+ 0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7, -+ 0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f, -+ 0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82, -+ 0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input032[] __initconst = { -+ 0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66, -+ 0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7, -+ 0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11 -+}; -+static const u8 enc_output032[] __initconst = { -+ 0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d, -+ 0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0, -+ 0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64, -+ 0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d, -+ 0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a -+}; -+static const u8 enc_assoc032[] __initconst = { }; -+static const u8 enc_nonce032[] __initconst = { -+ 0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76, -+ 0x2c, 0x65, 0xf3, 0x1b -+}; -+static const u8 enc_key032[] __initconst = { -+ 0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87, -+ 0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f, -+ 0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2, -+ 0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input033[] __initconst = { -+ 0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d, -+ 0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c, -+ 0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93 -+}; -+static const u8 enc_output033[] __initconst = { -+ 0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69, -+ 0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4, -+ 0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00, -+ 0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63, -+ 0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65 -+}; -+static const u8 enc_assoc033[] __initconst = { -+ 0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08 -+}; -+static const u8 enc_nonce033[] __initconst = { -+ 0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd, -+ 0x78, 0x34, 0xed, 0x55 -+}; -+static const u8 enc_key033[] __initconst = { -+ 0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed, -+ 0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda, -+ 0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d, -+ 0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input034[] __initconst = { -+ 0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f, -+ 0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c, -+ 0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26, -+ 0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29 -+}; -+static const u8 enc_output034[] __initconst = { -+ 0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab, -+ 0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb, -+ 0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1, -+ 0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56, -+ 0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f, -+ 0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93 -+}; -+static const u8 enc_assoc034[] __initconst = { }; -+static const u8 enc_nonce034[] __initconst = { -+ 0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31, -+ 0x37, 0x1a, 0x6f, 0xd2 -+}; -+static const u8 enc_key034[] __initconst = { -+ 0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e, -+ 0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2, -+ 0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15, -+ 0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input035[] __initconst = { -+ 0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2, -+ 0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f, -+ 0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56, -+ 0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb -+}; -+static const u8 enc_output035[] __initconst = { -+ 0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20, -+ 0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5, -+ 0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e, -+ 0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53, -+ 0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d, -+ 0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f -+}; -+static const u8 enc_assoc035[] __initconst = { -+ 0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48 -+}; -+static const u8 enc_nonce035[] __initconst = { -+ 0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8, -+ 0xb8, 0x1a, 0x1f, 0x8b -+}; -+static const u8 enc_key035[] __initconst = { -+ 0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f, -+ 0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40, -+ 0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4, -+ 0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input036[] __initconst = { -+ 0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a, -+ 0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb, -+ 0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce, -+ 0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78 -+}; -+static const u8 enc_output036[] __initconst = { -+ 0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76, -+ 0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e, -+ 0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50, -+ 0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21, -+ 0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33, -+ 0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea -+}; -+static const u8 enc_assoc036[] __initconst = { }; -+static const u8 enc_nonce036[] __initconst = { -+ 0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2, -+ 0x2b, 0x7e, 0x6e, 0x6a -+}; -+static const u8 enc_key036[] __initconst = { -+ 0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c, -+ 0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb, -+ 0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9, -+ 0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input037[] __initconst = { -+ 0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14, -+ 0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3, -+ 0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79, -+ 0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e -+}; -+static const u8 enc_output037[] __initconst = { -+ 0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b, -+ 0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2, -+ 0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29, -+ 0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4, -+ 0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d, -+ 0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32 -+}; -+static const u8 enc_assoc037[] __initconst = { -+ 0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59 -+}; -+static const u8 enc_nonce037[] __initconst = { -+ 0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5, -+ 0x34, 0x0d, 0xd1, 0xb8 -+}; -+static const u8 enc_key037[] __initconst = { -+ 0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4, -+ 0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f, -+ 0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd, -+ 0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input038[] __initconst = { -+ 0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4, -+ 0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e, -+ 0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11, -+ 0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04, -+ 0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46, -+ 0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a -+}; -+static const u8 enc_output038[] __initconst = { -+ 0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6, -+ 0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00, -+ 0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6, -+ 0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27, -+ 0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24, -+ 0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f, -+ 0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08, -+ 0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9 -+}; -+static const u8 enc_assoc038[] __initconst = { }; -+static const u8 enc_nonce038[] __initconst = { -+ 0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9, -+ 0x0b, 0xef, 0x55, 0xd2 -+}; -+static const u8 enc_key038[] __initconst = { -+ 0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51, -+ 0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63, -+ 0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3, -+ 0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input039[] __initconst = { -+ 0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74, -+ 0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3, -+ 0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d, -+ 0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59, -+ 0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c, -+ 0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9 -+}; -+static const u8 enc_output039[] __initconst = { -+ 0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec, -+ 0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04, -+ 0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e, -+ 0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d, -+ 0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3, -+ 0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4, -+ 0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42, -+ 0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3 -+}; -+static const u8 enc_assoc039[] __initconst = { -+ 0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84 -+}; -+static const u8 enc_nonce039[] __initconst = { -+ 0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b, -+ 0x31, 0xcd, 0x4d, 0x95 -+}; -+static const u8 enc_key039[] __initconst = { -+ 0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c, -+ 0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25, -+ 0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4, -+ 0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input040[] __initconst = { -+ 0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e, -+ 0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd, -+ 0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f, -+ 0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f, -+ 0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88, -+ 0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76, -+ 0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d, -+ 0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82 -+}; -+static const u8 enc_output040[] __initconst = { -+ 0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5, -+ 0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8, -+ 0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c, -+ 0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8, -+ 0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc, -+ 0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33, -+ 0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd, -+ 0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50, -+ 0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56, -+ 0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13 -+}; -+static const u8 enc_assoc040[] __initconst = { }; -+static const u8 enc_nonce040[] __initconst = { -+ 0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28, -+ 0x23, 0xcc, 0x06, 0x5b -+}; -+static const u8 enc_key040[] __initconst = { -+ 0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0, -+ 0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8, -+ 0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30, -+ 0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input041[] __initconst = { -+ 0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88, -+ 0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d, -+ 0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19, -+ 0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44, -+ 0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec, -+ 0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d, -+ 0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c, -+ 0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17 -+}; -+static const u8 enc_output041[] __initconst = { -+ 0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16, -+ 0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1, -+ 0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8, -+ 0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97, -+ 0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84, -+ 0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3, -+ 0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20, -+ 0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e, -+ 0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14, -+ 0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec -+}; -+static const u8 enc_assoc041[] __initconst = { -+ 0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2 -+}; -+static const u8 enc_nonce041[] __initconst = { -+ 0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d, -+ 0xe4, 0xeb, 0xb1, 0x9c -+}; -+static const u8 enc_key041[] __initconst = { -+ 0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9, -+ 0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2, -+ 0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5, -+ 0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input042[] __initconst = { -+ 0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba, -+ 0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58, -+ 0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06, -+ 0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64, -+ 0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5, -+ 0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74, -+ 0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61, -+ 0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e, -+ 0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6, -+ 0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd, -+ 0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4, -+ 0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5, -+ 0x92 -+}; -+static const u8 enc_output042[] __initconst = { -+ 0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97, -+ 0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf, -+ 0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98, -+ 0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5, -+ 0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45, -+ 0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10, -+ 0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28, -+ 0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe, -+ 0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c, -+ 0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a, -+ 0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2, -+ 0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3, -+ 0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b, -+ 0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b, -+ 0xb0 -+}; -+static const u8 enc_assoc042[] __initconst = { }; -+static const u8 enc_nonce042[] __initconst = { -+ 0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03, -+ 0xac, 0xde, 0x27, 0x99 -+}; -+static const u8 enc_key042[] __initconst = { -+ 0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28, -+ 0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d, -+ 0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a, -+ 0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input043[] __initconst = { -+ 0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff, -+ 0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc, -+ 0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc, -+ 0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5, -+ 0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd, -+ 0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55, -+ 0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85, -+ 0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b, -+ 0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3, -+ 0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad, -+ 0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92, -+ 0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31, -+ 0x76 -+}; -+static const u8 enc_output043[] __initconst = { -+ 0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5, -+ 0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06, -+ 0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e, -+ 0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae, -+ 0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37, -+ 0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8, -+ 0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3, -+ 0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d, -+ 0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70, -+ 0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07, -+ 0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6, -+ 0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54, -+ 0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5, -+ 0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac, -+ 0xf6 -+}; -+static const u8 enc_assoc043[] __initconst = { -+ 0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30 -+}; -+static const u8 enc_nonce043[] __initconst = { -+ 0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63, -+ 0xe5, 0x22, 0x94, 0x60 -+}; -+static const u8 enc_key043[] __initconst = { -+ 0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c, -+ 0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4, -+ 0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b, -+ 0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input044[] __initconst = { -+ 0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68, -+ 0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2 -+}; -+static const u8 enc_output044[] __initconst = { -+ 0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6, -+ 0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b, -+ 0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02, -+ 0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4 -+}; -+static const u8 enc_assoc044[] __initconst = { -+ 0x02 -+}; -+static const u8 enc_nonce044[] __initconst = { -+ 0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21, -+ 0x02, 0xd5, 0x06, 0x56 -+}; -+static const u8 enc_key044[] __initconst = { -+ 0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32, -+ 0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50, -+ 0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0, -+ 0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input045[] __initconst = { -+ 0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44, -+ 0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8 -+}; -+static const u8 enc_output045[] __initconst = { -+ 0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1, -+ 0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60, -+ 0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1, -+ 0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58 -+}; -+static const u8 enc_assoc045[] __initconst = { -+ 0xb6, 0x48 -+}; -+static const u8 enc_nonce045[] __initconst = { -+ 0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9, -+ 0x5b, 0x3a, 0xa7, 0x13 -+}; -+static const u8 enc_key045[] __initconst = { -+ 0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41, -+ 0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0, -+ 0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44, -+ 0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input046[] __initconst = { -+ 0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23, -+ 0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47 -+}; -+static const u8 enc_output046[] __initconst = { -+ 0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda, -+ 0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1, -+ 0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b, -+ 0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8 -+}; -+static const u8 enc_assoc046[] __initconst = { -+ 0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd, -+ 0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0 -+}; -+static const u8 enc_nonce046[] __initconst = { -+ 0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b, -+ 0xa4, 0x65, 0x96, 0xdf -+}; -+static const u8 enc_key046[] __initconst = { -+ 0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08, -+ 0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96, -+ 0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89, -+ 0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input047[] __initconst = { -+ 0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf, -+ 0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2 -+}; -+static const u8 enc_output047[] __initconst = { -+ 0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb, -+ 0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95, -+ 0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34, -+ 0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f -+}; -+static const u8 enc_assoc047[] __initconst = { -+ 0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07, -+ 0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b -+}; -+static const u8 enc_nonce047[] __initconst = { -+ 0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6, -+ 0x80, 0x92, 0x66, 0xd9 -+}; -+static const u8 enc_key047[] __initconst = { -+ 0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91, -+ 0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23, -+ 0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6, -+ 0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input048[] __initconst = { -+ 0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32, -+ 0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3 -+}; -+static const u8 enc_output048[] __initconst = { -+ 0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b, -+ 0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68, -+ 0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84, -+ 0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3 -+}; -+static const u8 enc_assoc048[] __initconst = { -+ 0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab, -+ 0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c, -+ 0x0e -+}; -+static const u8 enc_nonce048[] __initconst = { -+ 0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40, -+ 0xfc, 0x10, 0x68, 0xc3 -+}; -+static const u8 enc_key048[] __initconst = { -+ 0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b, -+ 0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97, -+ 0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b, -+ 0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input049[] __initconst = { -+ 0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2, -+ 0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6 -+}; -+static const u8 enc_output049[] __initconst = { -+ 0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87, -+ 0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11, -+ 0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e, -+ 0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6 -+}; -+static const u8 enc_assoc049[] __initconst = { -+ 0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8, -+ 0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94, -+ 0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5, -+ 0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda -+}; -+static const u8 enc_nonce049[] __initconst = { -+ 0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf, -+ 0xad, 0x14, 0xd5, 0x3e -+}; -+static const u8 enc_key049[] __initconst = { -+ 0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d, -+ 0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc, -+ 0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47, -+ 0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input050[] __initconst = { -+ 0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18, -+ 0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c -+}; -+static const u8 enc_output050[] __initconst = { -+ 0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6, -+ 0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca, -+ 0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b, -+ 0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04 -+}; -+static const u8 enc_assoc050[] __initconst = { -+ 0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22, -+ 0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07, -+ 0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90, -+ 0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37 -+}; -+static const u8 enc_nonce050[] __initconst = { -+ 0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28, -+ 0x6a, 0x7b, 0x76, 0x51 -+}; -+static const u8 enc_key050[] __initconst = { -+ 0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1, -+ 0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c, -+ 0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a, -+ 0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input051[] __initconst = { -+ 0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59, -+ 0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6 -+}; -+static const u8 enc_output051[] __initconst = { -+ 0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70, -+ 0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd, -+ 0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4, -+ 0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43 -+}; -+static const u8 enc_assoc051[] __initconst = { -+ 0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92, -+ 0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57, -+ 0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75, -+ 0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88, -+ 0x73 -+}; -+static const u8 enc_nonce051[] __initconst = { -+ 0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0, -+ 0xa8, 0xfa, 0x89, 0x49 -+}; -+static const u8 enc_key051[] __initconst = { -+ 0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27, -+ 0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74, -+ 0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c, -+ 0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99 -+}; -+ -+/* wycheproof - misc */ -+static const u8 enc_input052[] __initconst = { -+ 0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3, -+ 0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed -+}; -+static const u8 enc_output052[] __initconst = { -+ 0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc, -+ 0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b, -+ 0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed, -+ 0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32 -+}; -+static const u8 enc_assoc052[] __initconst = { -+ 0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a, -+ 0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14, -+ 0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae, -+ 0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c, -+ 0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92, -+ 0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61 -+}; -+static const u8 enc_nonce052[] __initconst = { -+ 0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73, -+ 0x0e, 0xc3, 0x5d, 0x12 -+}; -+static const u8 enc_key052[] __initconst = { -+ 0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5, -+ 0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf, -+ 0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c, -+ 0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4 -+}; -+ - /* wycheproof - misc */ - static const u8 enc_input053[] __initconst = { - 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83, -@@ -2760,6 +3859,126 @@ static const u8 enc_key073[] __initconst - }; - - /* wycheproof - checking for int overflows */ -+static const u8 enc_input074[] __initconst = { -+ 0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51, -+ 0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69, -+ 0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f, -+ 0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90, -+ 0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0, -+ 0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac, -+ 0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2, -+ 0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5, -+ 0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b, -+ 0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49, -+ 0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16, -+ 0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58, -+ 0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73, -+ 0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3, -+ 0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c, -+ 0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a -+}; -+static const u8 enc_output074[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85, -+ 0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca -+}; -+static const u8 enc_assoc074[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce074[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x00, 0x02, 0x50, 0x6e -+}; -+static const u8 enc_key074[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ -+static const u8 enc_input075[] __initconst = { -+ 0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75, -+ 0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56, -+ 0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2, -+ 0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed, -+ 0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f, -+ 0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f, -+ 0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0, -+ 0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8, -+ 0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32, -+ 0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8, -+ 0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b, -+ 0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b, -+ 0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd, -+ 0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f, -+ 0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1, -+ 0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94 -+}; -+static const u8 enc_output075[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7, -+ 0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5 -+}; -+static const u8 enc_assoc075[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_nonce075[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x00, 0x03, 0x18, 0xa5 -+}; -+static const u8 enc_key075[] __initconst = { -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, -+ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30 -+}; -+ -+/* wycheproof - checking for int overflows */ - static const u8 enc_input076[] __initconst = { - 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85, - 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02, -@@ -3349,6 +4568,286 @@ static const u8 enc_key085[] __initconst - 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f - }; - -+/* wycheproof - special case tag */ -+static const u8 enc_input086[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output086[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -+}; -+static const u8 enc_assoc086[] __initconst = { -+ 0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1, -+ 0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00 -+}; -+static const u8 enc_nonce086[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key086[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input087[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output087[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_assoc087[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f, -+ 0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58 -+}; -+static const u8 enc_nonce087[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key087[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input088[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output088[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+}; -+static const u8 enc_assoc088[] __initconst = { -+ 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f, -+ 0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00 -+}; -+static const u8 enc_nonce088[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key088[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input089[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output089[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, -+ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 -+}; -+static const u8 enc_assoc089[] __initconst = { -+ 0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae, -+ 0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02 -+}; -+static const u8 enc_nonce089[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key089[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input090[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output090[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, -+ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f -+}; -+static const u8 enc_assoc090[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe, -+ 0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3 -+}; -+static const u8 enc_nonce090[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key090[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input091[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output091[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, -+ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_assoc091[] __initconst = { -+ 0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30, -+ 0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01 -+}; -+static const u8 enc_nonce091[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key091[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ -+/* wycheproof - special case tag */ -+static const u8 enc_input092[] __initconst = { -+ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6, -+ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd, -+ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b, -+ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2, -+ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19, -+ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4, -+ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63, -+ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d -+}; -+static const u8 enc_output092[] __initconst = { -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, -+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -+}; -+static const u8 enc_assoc092[] __initconst = { -+ 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -+ 0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11, -+ 0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01 -+}; -+static const u8 enc_nonce092[] __initconst = { -+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, -+ 0x08, 0x09, 0x0a, 0x0b -+}; -+static const u8 enc_key092[] __initconst = { -+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, -+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, -+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, -+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f -+}; -+ - /* wycheproof - edge case intermediate sums in poly1305 */ - static const u8 enc_input093[] __initconst = { - 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d, -@@ -4455,6 +5954,86 @@ chacha20poly1305_enc_vectors[] __initcon - sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) }, - { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012, - sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) }, -+ { enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013, -+ sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) }, -+ { enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014, -+ sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) }, -+ { enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015, -+ sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) }, -+ { enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016, -+ sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) }, -+ { enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017, -+ sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) }, -+ { enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018, -+ sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) }, -+ { enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019, -+ sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) }, -+ { enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020, -+ sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) }, -+ { enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021, -+ sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) }, -+ { enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022, -+ sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) }, -+ { enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023, -+ sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) }, -+ { enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024, -+ sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) }, -+ { enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025, -+ sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) }, -+ { enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026, -+ sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) }, -+ { enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027, -+ sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) }, -+ { enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028, -+ sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) }, -+ { enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029, -+ sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) }, -+ { enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030, -+ sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) }, -+ { enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031, -+ sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) }, -+ { enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032, -+ sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) }, -+ { enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033, -+ sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) }, -+ { enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034, -+ sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) }, -+ { enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035, -+ sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) }, -+ { enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036, -+ sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) }, -+ { enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037, -+ sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) }, -+ { enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038, -+ sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) }, -+ { enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039, -+ sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) }, -+ { enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040, -+ sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) }, -+ { enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041, -+ sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) }, -+ { enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042, -+ sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) }, -+ { enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043, -+ sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) }, -+ { enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044, -+ sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) }, -+ { enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045, -+ sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) }, -+ { enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046, -+ sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) }, -+ { enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047, -+ sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) }, -+ { enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048, -+ sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) }, -+ { enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049, -+ sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) }, -+ { enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050, -+ sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) }, -+ { enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051, -+ sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) }, -+ { enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052, -+ sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) }, - { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053, - sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) }, - { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054, -@@ -4497,6 +6076,10 @@ chacha20poly1305_enc_vectors[] __initcon - sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) }, - { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073, - sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) }, -+ { enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074, -+ sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) }, -+ { enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075, -+ sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) }, - { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076, - sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) }, - { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077, -@@ -4517,6 +6100,20 @@ chacha20poly1305_enc_vectors[] __initcon - sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) }, - { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085, - sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) }, -+ { enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086, -+ sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) }, -+ { enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087, -+ sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) }, -+ { enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088, -+ sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) }, -+ { enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089, -+ sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) }, -+ { enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090, -+ sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) }, -+ { enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091, -+ sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) }, -+ { enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092, -+ sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) }, - { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093, - sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) }, - { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094, -@@ -7224,6 +8821,43 @@ xchacha20poly1305_dec_vectors[] __initco - sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) } - }; - -+/* This is for the selftests-only, since it is only useful for the purpose of -+ * testing the underlying primitives and interactions. -+ */ -+static void __init -+chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len, -+ const u8 *ad, const size_t ad_len, -+ const u8 nonce[12], -+ const u8 key[CHACHA20POLY1305_KEY_SIZE]) -+{ -+ const u8 *pad0 = page_address(ZERO_PAGE(0)); -+ struct poly1305_desc_ctx poly1305_state; -+ u32 chacha20_state[CHACHA_STATE_WORDS]; -+ union { -+ u8 block0[POLY1305_KEY_SIZE]; -+ __le64 lens[2]; -+ } b = {{ 0 }}; -+ u8 bottom_row[16] = { 0 }; -+ u32 le_key[8]; -+ int i; -+ -+ memcpy(&bottom_row[4], nonce, 12); -+ for (i = 0; i < 8; ++i) -+ le_key[i] = get_unaligned_le32(key + sizeof(le_key[i]) * i); -+ chacha_init(chacha20_state, le_key, bottom_row); -+ chacha20_crypt(chacha20_state, b.block0, b.block0, sizeof(b.block0)); -+ poly1305_init(&poly1305_state, b.block0); -+ poly1305_update(&poly1305_state, ad, ad_len); -+ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf); -+ chacha20_crypt(chacha20_state, dst, src, src_len); -+ poly1305_update(&poly1305_state, dst, src_len); -+ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf); -+ b.lens[0] = cpu_to_le64(ad_len); -+ b.lens[1] = cpu_to_le64(src_len); -+ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens)); -+ poly1305_final(&poly1305_state, dst + src_len); -+} -+ - static void __init - chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len, - const u8 *ad, const size_t ad_len, -@@ -7233,6 +8867,9 @@ chacha20poly1305_selftest_encrypt(u8 *ds - if (nonce_len == 8) - chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, - get_unaligned_le64(nonce), key); -+ else if (nonce_len == 12) -+ chacha20poly1305_encrypt_bignonce(dst, src, src_len, ad, -+ ad_len, nonce, key); - else - BUG(); - } -@@ -7248,14 +8885,14 @@ decryption_success(bool func_ret, bool e - bool __init chacha20poly1305_selftest(void) - { - enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 }; -- size_t i; -- u8 *computed_output = NULL, *heap_src = NULL; -- struct scatterlist sg_src; -+ size_t i, j, k, total_len; -+ u8 *computed_output = NULL, *input = NULL; - bool success = true, ret; -+ struct scatterlist sg_src[3]; - -- heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); - computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); -- if (!heap_src || !computed_output) { -+ input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL); -+ if (!computed_output || !input) { - pr_err("chacha20poly1305 self-test malloc: FAIL\n"); - success = false; - goto out; -@@ -7284,17 +8921,17 @@ bool __init chacha20poly1305_selftest(vo - for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { - if (chacha20poly1305_enc_vectors[i].nlen != 8) - continue; -- memcpy(heap_src, chacha20poly1305_enc_vectors[i].input, -+ memcpy(computed_output, chacha20poly1305_enc_vectors[i].input, - chacha20poly1305_enc_vectors[i].ilen); -- sg_init_one(&sg_src, heap_src, -+ sg_init_one(sg_src, computed_output, - chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE); -- chacha20poly1305_encrypt_sg_inplace(&sg_src, -+ ret = chacha20poly1305_encrypt_sg_inplace(sg_src, - chacha20poly1305_enc_vectors[i].ilen, - chacha20poly1305_enc_vectors[i].assoc, - chacha20poly1305_enc_vectors[i].alen, - get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce), - chacha20poly1305_enc_vectors[i].key); -- if (memcmp(heap_src, -+ if (!ret || memcmp(computed_output, - chacha20poly1305_enc_vectors[i].output, - chacha20poly1305_enc_vectors[i].ilen + - POLY1305_DIGEST_SIZE)) { -@@ -7326,11 +8963,11 @@ bool __init chacha20poly1305_selftest(vo - } - - for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { -- memcpy(heap_src, chacha20poly1305_dec_vectors[i].input, -+ memcpy(computed_output, chacha20poly1305_dec_vectors[i].input, - chacha20poly1305_dec_vectors[i].ilen); -- sg_init_one(&sg_src, heap_src, -+ sg_init_one(sg_src, computed_output, - chacha20poly1305_dec_vectors[i].ilen); -- ret = chacha20poly1305_decrypt_sg_inplace(&sg_src, -+ ret = chacha20poly1305_decrypt_sg_inplace(sg_src, - chacha20poly1305_dec_vectors[i].ilen, - chacha20poly1305_dec_vectors[i].assoc, - chacha20poly1305_dec_vectors[i].alen, -@@ -7338,7 +8975,7 @@ bool __init chacha20poly1305_selftest(vo - chacha20poly1305_dec_vectors[i].key); - if (!decryption_success(ret, - chacha20poly1305_dec_vectors[i].failure, -- memcmp(heap_src, chacha20poly1305_dec_vectors[i].output, -+ memcmp(computed_output, chacha20poly1305_dec_vectors[i].output, - chacha20poly1305_dec_vectors[i].ilen - - POLY1305_DIGEST_SIZE))) { - pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n", -@@ -7365,6 +9002,7 @@ bool __init chacha20poly1305_selftest(vo - success = false; - } - } -+ - for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) { - memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN); - ret = xchacha20poly1305_decrypt(computed_output, -@@ -7386,8 +9024,54 @@ bool __init chacha20poly1305_selftest(vo - } - } - -+ for (total_len = POLY1305_DIGEST_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST) -+ && total_len <= 1 << 10; ++total_len) { -+ for (i = 0; i <= total_len; ++i) { -+ for (j = i; j <= total_len; ++j) { -+ sg_init_table(sg_src, 3); -+ sg_set_buf(&sg_src[0], input, i); -+ sg_set_buf(&sg_src[1], input + i, j - i); -+ sg_set_buf(&sg_src[2], input + j, total_len - j); -+ memset(computed_output, 0, total_len); -+ memset(input, 0, total_len); -+ -+ if (!chacha20poly1305_encrypt_sg_inplace(sg_src, -+ total_len - POLY1305_DIGEST_SIZE, NULL, 0, -+ 0, enc_key001)) -+ goto chunkfail; -+ chacha20poly1305_encrypt(computed_output, -+ computed_output, -+ total_len - POLY1305_DIGEST_SIZE, NULL, 0, 0, -+ enc_key001); -+ if (memcmp(computed_output, input, total_len)) -+ goto chunkfail; -+ if (!chacha20poly1305_decrypt(computed_output, -+ input, total_len, NULL, 0, 0, enc_key001)) -+ goto chunkfail; -+ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) { -+ if (computed_output[k]) -+ goto chunkfail; -+ } -+ if (!chacha20poly1305_decrypt_sg_inplace(sg_src, -+ total_len, NULL, 0, 0, enc_key001)) -+ goto chunkfail; -+ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) { -+ if (input[k]) -+ goto chunkfail; -+ } -+ continue; -+ -+ chunkfail: -+ pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n", -+ total_len, i, j); -+ success = false; -+ } -+ -+ } -+ } -+ - out: -- kfree(heap_src); - kfree(computed_output); -+ kfree(input); - return success; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0048-crypto-x86-poly1305-emit-does-base-conversion-itself.patch b/target/linux/generic/backport-5.4/080-wireguard-0048-crypto-x86-poly1305-emit-does-base-conversion-itself.patch deleted file mode 100644 index 8209ca2898..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0048-crypto-x86-poly1305-emit-does-base-conversion-itself.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 17 Jan 2020 11:42:22 +0100 -Subject: [PATCH] crypto: x86/poly1305 - emit does base conversion itself - -commit f9e7fe32a792726186301423ff63a465d63386e1 upstream. - -The emit code does optional base conversion itself in assembly, so we -don't need to do that here. Also, neither one of these functions uses -simd instructions, so checking for that doesn't make sense either. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -123,13 +123,9 @@ static void poly1305_simd_blocks(void *c - static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) - { -- struct poly1305_arch_internal *state = ctx; -- -- if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || -- !state->is_base2_26 || !crypto_simd_usable()) { -- convert_to_base2_64(ctx); -+ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx)) - poly1305_emit_x86_64(ctx, mac, nonce); -- } else -+ else - poly1305_emit_avx(ctx, mac, nonce); - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0049-crypto-arm-chacha-fix-build-failured-when-kernel-mod.patch b/target/linux/generic/backport-5.4/080-wireguard-0049-crypto-arm-chacha-fix-build-failured-when-kernel-mod.patch deleted file mode 100644 index 354f584315..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0049-crypto-arm-chacha-fix-build-failured-when-kernel-mod.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 17 Jan 2020 17:43:18 +0100 -Subject: [PATCH] crypto: arm/chacha - fix build failured when kernel mode NEON - is disabled - -commit 0bc81767c5bd9d005fae1099fb39eb3688370cb1 upstream. - -When the ARM accelerated ChaCha driver is built as part of a configuration -that has kernel mode NEON disabled, we expect the compiler to propagate -the build time constant expression IS_ENABLED(CONFIG_KERNEL_MODE_NEON) in -a way that eliminates all the cross-object references to the actual NEON -routines, which allows the chacha-neon-core.o object to be omitted from -the build entirely. - -Unfortunately, this fails to work as expected in some cases, and we may -end up with a build error such as - - chacha-glue.c:(.text+0xc0): undefined reference to `chacha_4block_xor_neon' - -caused by the fact that chacha_doneon() has not been eliminated from the -object code, even though it will never be called in practice. - -Let's fix this by adding some IS_ENABLED(CONFIG_KERNEL_MODE_NEON) tests -that are not strictly needed from a logical point of view, but should -help the compiler infer that the NEON code paths are unreachable in -those cases. - -Fixes: b36d8c09e710c71f ("crypto: arm/chacha - remove dependency on generic ...") -Reported-by: Russell King <linux@armlinux.org.uk> -Cc: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skci - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - -- if (!neon) { -+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { - chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, - nbytes, state, ctx->nrounds); - state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); -@@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_re - - chacha_init_generic(state, ctx->key, req->iv); - -- if (!neon) { -+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { - hchacha_block_arm(state, subctx.key, ctx->nrounds); - } else { - kernel_neon_begin(); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0050-crypto-Kconfig-allow-tests-to-be-disabled-when-manag.patch b/target/linux/generic/backport-5.4/080-wireguard-0050-crypto-Kconfig-allow-tests-to-be-disabled-when-manag.patch deleted file mode 100644 index c52bf0a2a7..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0050-crypto-Kconfig-allow-tests-to-be-disabled-when-manag.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 17 Jan 2020 12:01:36 +0100 -Subject: [PATCH] crypto: Kconfig - allow tests to be disabled when manager is - disabled - -commit 2343d1529aff8b552589f622c23932035ed7a05d upstream. - -The library code uses CRYPTO_MANAGER_DISABLE_TESTS to conditionalize its -tests, but the library code can also exist without CRYPTO_MANAGER. That -means on minimal configs, the test code winds up being built with no way -to disable it. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/Kconfig | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -136,8 +136,6 @@ config CRYPTO_USER - Userspace configuration for cryptographic instantiations such as - cbc(aes). - --if CRYPTO_MANAGER2 -- - config CRYPTO_MANAGER_DISABLE_TESTS - bool "Disable run-time self tests" - default y -@@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS - This is intended for developer use only, as these tests take much - longer to run than the normal self tests. - --endif # if CRYPTO_MANAGER2 -- - config CRYPTO_GF128MUL - tristate - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0051-crypto-chacha20poly1305-prevent-integer-overflow-on-.patch b/target/linux/generic/backport-5.4/080-wireguard-0051-crypto-chacha20poly1305-prevent-integer-overflow-on-.patch deleted file mode 100644 index 1ed49e5b6c..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0051-crypto-chacha20poly1305-prevent-integer-overflow-on-.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 6 Feb 2020 12:42:01 +0100 -Subject: [PATCH] crypto: chacha20poly1305 - prevent integer overflow on large - input - -commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream. - -This code assigns src_len (size_t) to sl (int), which causes problems -when src_len is very large. Probably nobody in the kernel should be -passing this much data to chacha20poly1305 all in one go anyway, so I -don't think we need to change the algorithm or introduce larger types -or anything. But we should at least error out early in this case and -print a warning so that we get reports if this does happen and can look -into why anybody is possibly passing it that much data or if they're -accidently passing -1 or similar. - -Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine") -Cc: Ard Biesheuvel <ardb@kernel.org> -Cc: stable@vger.kernel.org # 5.5+ -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/chacha20poly1305.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/lib/crypto/chacha20poly1305.c -+++ b/lib/crypto/chacha20poly1305.c -@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(s - __le64 lens[2]; - } b __aligned(16); - -+ if (WARN_ON(src_len > INT_MAX)) -+ return false; -+ - chacha_load_key(b.k, key); - - b.iv[0] = 0; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0052-crypto-x86-curve25519-support-assemblers-with-no-adx.patch b/target/linux/generic/backport-5.4/080-wireguard-0052-crypto-x86-curve25519-support-assemblers-with-no-adx.patch deleted file mode 100644 index cd507b1e44..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0052-crypto-x86-curve25519-support-assemblers-with-no-adx.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 1 Mar 2020 22:52:35 +0800 -Subject: [PATCH] crypto: x86/curve25519 - support assemblers with no adx - support - -commit 1579f1bc3b753d17a44de3457d5c6f4a5b14c752 upstream. - -Some older version of GAS do not support the ADX instructions, similarly -to how they also don't support AVX and such. This commit adds the same -build-time detection mechanisms we use for AVX and others for ADX, and -then makes sure that the curve25519 library dispatcher calls the right -functions. - -Reported-by: Willy Tarreau <w@1wt.eu> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/Makefile | 5 +++-- - arch/x86/crypto/Makefile | 7 ++++++- - include/crypto/curve25519.h | 6 ++++-- - 3 files changed, 13 insertions(+), 5 deletions(-) - ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -198,9 +198,10 @@ avx2_instr :=$(call as-instr,vpbroadcast - avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1) - sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) - sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) -+adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1) - --KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) --KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) -+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) -+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) - - KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) - ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgath - avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no) - sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) - sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) -+adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no) - - obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o - -@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) - - obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o --obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o -+ -+# These modules require the assembler to support ADX. -+ifeq ($(adx_supported),yes) -+ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o -+endif - - # These modules require assembler to support AVX. - ifeq ($(avx_supported),yes) ---- a/include/crypto/curve25519.h -+++ b/include/crypto/curve25519.h -@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic - const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]) - { -- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && -+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) - curve25519_arch(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); -@@ -49,7 +50,8 @@ __must_check curve25519_generate_public( - CURVE25519_KEY_SIZE))) - return false; - -- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) -+ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) && -+ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX))) - curve25519_base_arch(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0053-crypto-arm64-chacha-correctly-walk-through-blocks.patch b/target/linux/generic/backport-5.4/080-wireguard-0053-crypto-arm64-chacha-correctly-walk-through-blocks.patch deleted file mode 100644 index 823a908373..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0053-crypto-arm64-chacha-correctly-walk-through-blocks.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 18 Mar 2020 20:27:32 -0600 -Subject: [PATCH] crypto: arm64/chacha - correctly walk through blocks - -commit c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 upstream. - -Prior, passing in chunks of 2, 3, or 4, followed by any additional -chunks would result in the chacha state counter getting out of sync, -resulting in incorrect encryption/decryption, which is a pretty nasty -crypto vuln: "why do images look weird on webpages?" WireGuard users -never experienced this prior, because we have always, out of tree, used -a different crypto library, until the recent Frankenzinc addition. This -commit fixes the issue by advancing the pointers and state counter by -the actual size processed. It also fixes up a bug in the (optional, -costly) stride test that prevented it from running on arm64. - -Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") -Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk> -Cc: Ard Biesheuvel <ardb@kernel.org> -Cc: stable@vger.kernel.org # v5.5+ -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Reviewed-by: Eric Biggers <ebiggers@google.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm64/crypto/chacha-neon-glue.c | 8 ++++---- - lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++--- - 2 files changed, 12 insertions(+), 7 deletions(-) - ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 - break; - } - chacha_4block_xor_neon(state, dst, src, nrounds, l); -- bytes -= CHACHA_BLOCK_SIZE * 5; -- src += CHACHA_BLOCK_SIZE * 5; -- dst += CHACHA_BLOCK_SIZE * 5; -- state[12] += 5; -+ bytes -= l; -+ src += l; -+ dst += l; -+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } - } - ---- a/lib/crypto/chacha20poly1305-selftest.c -+++ b/lib/crypto/chacha20poly1305-selftest.c -@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(vo - && total_len <= 1 << 10; ++total_len) { - for (i = 0; i <= total_len; ++i) { - for (j = i; j <= total_len; ++j) { -+ k = 0; - sg_init_table(sg_src, 3); -- sg_set_buf(&sg_src[0], input, i); -- sg_set_buf(&sg_src[1], input + i, j - i); -- sg_set_buf(&sg_src[2], input + j, total_len - j); -+ if (i) -+ sg_set_buf(&sg_src[k++], input, i); -+ if (j - i) -+ sg_set_buf(&sg_src[k++], input + i, j - i); -+ if (total_len - j) -+ sg_set_buf(&sg_src[k++], input + j, total_len - j); -+ sg_init_marker(sg_src, k); - memset(computed_output, 0, total_len); - memset(input, 0, total_len); - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0054-crypto-x86-curve25519-replace-with-formally-verified.patch b/target/linux/generic/backport-5.4/080-wireguard-0054-crypto-x86-curve25519-replace-with-formally-verified.patch deleted file mode 100644 index 938d700da2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0054-crypto-x86-curve25519-replace-with-formally-verified.patch +++ /dev/null @@ -1,3765 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 20 Jan 2020 18:18:15 +0100 -Subject: [PATCH] crypto: x86/curve25519 - replace with formally verified - implementation - -commit 07b586fe06625b0b610dc3d3a969c51913d143d4 upstream. - -This comes from INRIA's HACL*/Vale. It implements the same algorithm and -implementation strategy as the code it replaces, only this code has been -formally verified, sans the base point multiplication, which uses code -similar to prior, only it uses the formally verified field arithmetic -alongside reproducable ladder generation steps. This doesn't have a -pure-bmi2 version, which means haswell no longer benefits, but the -increased (doubled) code complexity is not worth it for a single -generation of chips that's already old. - -Performance-wise, this is around 1% slower on older microarchitectures, -and slightly faster on newer microarchitectures, mainly 10nm ones or -backports of 10nm to 14nm. This implementation is "everest" below: - -Xeon E5-2680 v4 (Broadwell) - - armfazh: 133340 cycles per call - everest: 133436 cycles per call - -Xeon Gold 5120 (Sky Lake Server) - - armfazh: 112636 cycles per call - everest: 113906 cycles per call - -Core i5-6300U (Sky Lake Client) - - armfazh: 116810 cycles per call - everest: 117916 cycles per call - -Core i7-7600U (Kaby Lake) - - armfazh: 119523 cycles per call - everest: 119040 cycles per call - -Core i7-8750H (Coffee Lake) - - armfazh: 113914 cycles per call - everest: 113650 cycles per call - -Core i9-9880H (Coffee Lake Refresh) - - armfazh: 112616 cycles per call - everest: 114082 cycles per call - -Core i3-8121U (Cannon Lake) - - armfazh: 113202 cycles per call - everest: 111382 cycles per call - -Core i7-8265U (Whiskey Lake) - - armfazh: 127307 cycles per call - everest: 127697 cycles per call - -Core i7-8550U (Kaby Lake Refresh) - - armfazh: 127522 cycles per call - everest: 127083 cycles per call - -Xeon Platinum 8275CL (Cascade Lake) - - armfazh: 114380 cycles per call - everest: 114656 cycles per call - -Achieving these kind of results with formally verified code is quite -remarkable, especialy considering that performance is favorable for -newer chips. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/curve25519-x86_64.c | 3546 ++++++++++----------------- - 1 file changed, 1292 insertions(+), 2254 deletions(-) - ---- a/arch/x86/crypto/curve25519-x86_64.c -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -1,8 +1,7 @@ --// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause -+// SPDX-License-Identifier: GPL-2.0 OR MIT - /* -- * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved. -- * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -- * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. -+ * Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - */ - - #include <crypto/curve25519.h> -@@ -16,2337 +15,1378 @@ - #include <asm/cpufeature.h> - #include <asm/processor.h> - --static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); --static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); -- --enum { NUM_WORDS_ELTFP25519 = 4 }; --typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; --typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; -- --#define mul_eltfp25519_1w_adx(c, a, b) do { \ -- mul_256x256_integer_adx(m.buffer, a, b); \ -- red_eltfp25519_1w_adx(c, m.buffer); \ --} while (0) -- --#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ -- mul_256x256_integer_bmi2(m.buffer, a, b); \ -- red_eltfp25519_1w_bmi2(c, m.buffer); \ --} while (0) -- --#define sqr_eltfp25519_1w_adx(a) do { \ -- sqr_256x256_integer_adx(m.buffer, a); \ -- red_eltfp25519_1w_adx(a, m.buffer); \ --} while (0) -- --#define sqr_eltfp25519_1w_bmi2(a) do { \ -- sqr_256x256_integer_bmi2(m.buffer, a); \ -- red_eltfp25519_1w_bmi2(a, m.buffer); \ --} while (0) -- --#define mul_eltfp25519_2w_adx(c, a, b) do { \ -- mul2_256x256_integer_adx(m.buffer, a, b); \ -- red_eltfp25519_2w_adx(c, m.buffer); \ --} while (0) -- --#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ -- mul2_256x256_integer_bmi2(m.buffer, a, b); \ -- red_eltfp25519_2w_bmi2(c, m.buffer); \ --} while (0) -- --#define sqr_eltfp25519_2w_adx(a) do { \ -- sqr2_256x256_integer_adx(m.buffer, a); \ -- red_eltfp25519_2w_adx(a, m.buffer); \ --} while (0) -- --#define sqr_eltfp25519_2w_bmi2(a) do { \ -- sqr2_256x256_integer_bmi2(m.buffer, a); \ -- red_eltfp25519_2w_bmi2(a, m.buffer); \ --} while (0) -- --#define sqrn_eltfp25519_1w_adx(a, times) do { \ -- int ____counter = (times); \ -- while (____counter-- > 0) \ -- sqr_eltfp25519_1w_adx(a); \ --} while (0) -- --#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ -- int ____counter = (times); \ -- while (____counter-- > 0) \ -- sqr_eltfp25519_1w_bmi2(a); \ --} while (0) -- --#define copy_eltfp25519_1w(C, A) do { \ -- (C)[0] = (A)[0]; \ -- (C)[1] = (A)[1]; \ -- (C)[2] = (A)[2]; \ -- (C)[3] = (A)[3]; \ --} while (0) -- --#define setzero_eltfp25519_1w(C) do { \ -- (C)[0] = 0; \ -- (C)[1] = 0; \ -- (C)[2] = 0; \ -- (C)[3] = 0; \ --} while (0) -- --__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { -- /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, -- 0xffffffffffffffffUL, 0x5fffffffffffffffUL, -- /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, -- 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, -- /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, -- 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, -- /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, -- 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, -- /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, -- 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, -- /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, -- 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, -- /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, -- 0xc1c20d06231f7614UL, 0x2938218da274f972UL, -- /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, -- 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, -- /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, -- 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, -- /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, -- 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, -- /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, -- 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, -- /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, -- 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, -- /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, -- 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, -- /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, -- 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, -- /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, -- 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, -- /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, -- 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, -- /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, -- 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, -- /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, -- 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, -- /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, -- 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, -- /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, -- 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, -- /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, -- 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, -- /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, -- 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, -- /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, -- 0x23758739f630a257UL, 0x295a407a01a78580UL, -- /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, -- 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, -- /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, -- 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, -- /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, -- 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, -- /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, -- 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, -- /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, -- 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, -- /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, -- 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, -- /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, -- 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, -- /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, -- 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, -- /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, -- 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, -- /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, -- 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, -- /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, -- 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, -- /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, -- 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, -- /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, -- 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, -- /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, -- 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, -- /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, -- 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, -- /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, -- 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, -- /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, -- 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, -- /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, -- 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, -- /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, -- 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, -- /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, -- 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, -- /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, -- 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, -- /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, -- 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, -- /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, -- 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, -- /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, -- 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, -- /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, -- 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, -- /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, -- 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, -- /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, -- 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, -- /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, -- 0xc189218075e91436UL, 0x6d9284169b3b8484UL, -- /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, -- 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, -- /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, -- 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, -- /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, -- 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, -- /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, -- 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, -- /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, -- 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, -- /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, -- 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, -- /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, -- 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, -- /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, -- 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, -- /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, -- 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, -- /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, -- 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, -- /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, -- 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, -- /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, -- 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, -- /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, -- 0x25232973322dbef4UL, 0x445dc4758c17f770UL, -- /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, -- 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, -- /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, -- 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, -- /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, -- 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, -- /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, -- 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, -- /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, -- 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, -- /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, -- 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, -- /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, -- 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, -- /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, -- 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, -- /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, -- 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, -- /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, -- 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, -- /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, -- 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, -- /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, -- 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, -- /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, -- 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, -- /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, -- 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, -- /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, -- 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, -- /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, -- 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, -- /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, -- 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, -- /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, -- 0x894d1d855ae52359UL, 0x68e122157b743d69UL, -- /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, -- 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, -- /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, -- 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, -- /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, -- 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, -- /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, -- 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, -- /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, -- 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, -- /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, -- 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, -- /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, -- 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, -- /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, -- 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, -- /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, -- 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, -- /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, -- 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, -- /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, -- 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, -- /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, -- 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, -- /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, -- 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, -- /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, -- 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, -- /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, -- 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, -- /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, -- 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, -- /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, -- 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, -- /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, -- 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, -- /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, -- 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, -- /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, -- 0x4a497962066e6043UL, 0x705b3aab41355b44UL, -- /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, -- 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, -- /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, -- 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, -- /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, -- 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, -- /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, -- 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, -- /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, -- 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, -- /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, -- 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, -- /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, -- 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, -- /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, -- 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, -- /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, -- 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, -- /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, -- 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, -- /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, -- 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, -- /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, -- 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, -- /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, -- 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, -- /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, -- 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, -- /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, -- 0x508e862f121692fcUL, 0x3a81907fa093c291UL, -- /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, -- 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, -- /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, -- 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, -- /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, -- 0xe488de11d761e352UL, 0x0e878a01a085545cUL, -- /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, -- 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, -- /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, -- 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, -- /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, -- 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, -- /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, -- 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, -- /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, -- 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, -- /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, -- 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, -- /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, -- 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, -- /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, -- 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, -- /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, -- 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, -- /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, -- 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, -- /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, -- 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, -- /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, -- 0x266fd5809208f294UL, 0x5c847085619a26b9UL, -- /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, -- 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, -- /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, -- 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, -- /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, -- 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, -- /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, -- 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, -- /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, -- 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, -- /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, -- 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, -- /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, -- 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, -- /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, -- 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, -- /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, -- 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, -- /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, -- 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, -- /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, -- 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, -- /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, -- 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, -- /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, -- 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, -- /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, -- 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, -- /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, -- 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, -- /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, -- 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, -- /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, -- 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, -- /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, -- 0x52d17436309d4253UL, 0x356f97e13efae576UL, -- /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, -- 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, -- /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, -- 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, -- /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, -- 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, -- /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, -- 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, -- /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, -- 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, -- /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, -- 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, -- /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, -- 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, -- /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, -- 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, -- /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, -- 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, -- /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, -- 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, -- /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, -- 0x497d723f802e88e1UL, 0x30684dea602f408dUL, -- /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, -- 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, -- /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, -- 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, -- /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, -- 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, -- /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, -- 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, -- /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, -- 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, -- /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, -- 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, -- /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, -- 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, -- /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, -- 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, -- /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, -- 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, -- /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, -- 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, -- /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, -- 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, -- /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, -- 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, -- /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, -- 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, -- /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, -- 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, -- /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, -- 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, -- /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, -- 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, -- /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, -- 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, -- /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, -- 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, -- /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, -- 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, -- /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, -- 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, -- /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, -- 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, -- /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, -- 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, -- /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, -- 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, -- /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, -- 0x81004b71e33cc191UL, 0x44e6be345122803cUL, -- /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, -- 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, -- /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, -- 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, -- /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, -- 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, -- /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, -- 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, -- /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, -- 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, -- /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, -- 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, -- /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, -- 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, -- /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, -- 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, -- /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, -- 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, -- /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, -- 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, -- /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, -- 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, -- /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, -- 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, -- /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, -- 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, -- /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, -- 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, -- /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, -- 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, -- /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, -- 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, -- /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, -- 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, -- /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, -- 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, -- /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, -- 0x33979624f0e917beUL, 0x2c018dc527356b30UL, -- /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, -- 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, -- /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, -- 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, -- /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, -- 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, -- /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, -- 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, -- /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, -- 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, -- /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, -- 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, -- /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, -- 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, -- /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, -- 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, -- /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, -- 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, -- /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, -- 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, -- /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, -- 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, -- /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, -- 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, -- /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, -- 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, -- /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, -- 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, -- /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, -- 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, -- /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, -- 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, -- /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, -- 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, -- /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, -- 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, -- /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, -- 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, -- /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, -- 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, -- /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, -- 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, -- /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, -- 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, -- /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, -- 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, -- /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, -- 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, -- /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, -- 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, -- /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, -- 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, -- /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, -- 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, -- /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, -- 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, -- /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, -- 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, -- /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, -- 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, -- /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, -- 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, -- /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, -- 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, -- /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, -- 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, -- /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, -- 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, -- /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, -- 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, -- /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, -- 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, -- /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, -- 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, -- /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, -- 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, -- /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, -- 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, -- /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, -- 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, -- /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, -- 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, -- /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, -- 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, -- /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, -- 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, -- /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, -- 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, -- /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, -- 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, -- /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, -- 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, -- /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, -- 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, -- /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, -- 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL --}; -- --/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] -- * a is two 256-bit integers: a0[0:3] and a1[4:7] -- * b is two 256-bit integers: b0[0:3] and b1[4:7] -- */ --static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, -- const u64 *const b) --{ -- asm volatile( -- "xorl %%r14d, %%r14d ;" -- "movq (%1), %%rdx; " /* A[0] */ -- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -- "xorl %%r10d, %%r10d ;" -- "movq %%r8, (%0) ;" -- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -- "adox %%r10, %%r15 ;" -- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -- "adox %%r8, %%rax ;" -- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -- "adox %%r10, %%rbx ;" -- /******************************************/ -- "adox %%r14, %%rcx ;" -- -- "movq 8(%1), %%rdx; " /* A[1] */ -- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -- "adox %%r15, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rax ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%rbx ;" -- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%rcx ;" -- /******************************************/ -- "adox %%r14, %%r15 ;" -- "adcx %%r14, %%r15 ;" -- -- "movq 16(%1), %%rdx; " /* A[2] */ -- "xorl %%r10d, %%r10d ;" -- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -- "adox %%rax, %%r8 ;" -- "movq %%r8, 16(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rbx ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%rcx ;" -- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%r15 ;" -- /******************************************/ -- "adox %%r14, %%rax ;" -- "adcx %%r14, %%rax ;" -- -- "movq 24(%1), %%rdx; " /* A[3] */ -- "xorl %%r10d, %%r10d ;" -- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -- "adox %%rbx, %%r8 ;" -- "movq %%r8, 24(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rcx ;" -- "movq %%rcx, 32(%0) ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%rax ;" -- "movq %%rax, 48(%0) ;" -- /******************************************/ -- "adox %%r14, %%rbx ;" -- "adcx %%r14, %%rbx ;" -- "movq %%rbx, 56(%0) ;" -- -- "movq 32(%1), %%rdx; " /* C[0] */ -- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ -- "xorl %%r10d, %%r10d ;" -- "movq %%r8, 64(%0);" -- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ -- "adox %%r10, %%r15 ;" -- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ -- "adox %%r8, %%rax ;" -- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ -- "adox %%r10, %%rbx ;" -- /******************************************/ -- "adox %%r14, %%rcx ;" -- -- "movq 40(%1), %%rdx; " /* C[1] */ -- "xorl %%r10d, %%r10d ;" -- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ -- "adox %%r15, %%r8 ;" -- "movq %%r8, 72(%0);" -- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rax ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%rbx ;" -- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%rcx ;" -- /******************************************/ -- "adox %%r14, %%r15 ;" -- "adcx %%r14, %%r15 ;" -- -- "movq 48(%1), %%rdx; " /* C[2] */ -- "xorl %%r10d, %%r10d ;" -- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ -- "adox %%rax, %%r8 ;" -- "movq %%r8, 80(%0);" -- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rbx ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%rcx ;" -- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%r15 ;" -- /******************************************/ -- "adox %%r14, %%rax ;" -- "adcx %%r14, %%rax ;" -- -- "movq 56(%1), %%rdx; " /* C[3] */ -- "xorl %%r10d, %%r10d ;" -- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ -- "adox %%rbx, %%r8 ;" -- "movq %%r8, 88(%0);" -- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ -- "adox %%r10, %%r9 ;" -- "adcx %%r9, %%rcx ;" -- "movq %%rcx, 96(%0) ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ -- "adox %%r8, %%r11 ;" -- "adcx %%r11, %%r15 ;" -- "movq %%r15, 104(%0) ;" -- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ -- "adox %%r10, %%r13 ;" -- "adcx %%r13, %%rax ;" -- "movq %%rax, 112(%0) ;" -- /******************************************/ -- "adox %%r14, %%rbx ;" -- "adcx %%r14, %%rbx ;" -- "movq %%rbx, 120(%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11", "%r13", "%r14", "%r15"); --} -- --static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, -- const u64 *const b) -+static __always_inline u64 eq_mask(u64 a, u64 b) - { -- asm volatile( -- "movq (%1), %%rdx; " /* A[0] */ -- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -- "movq %%r8, (%0) ;" -- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -- "addq %%r10, %%r15 ;" -- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -- "adcq %%r8, %%rax ;" -- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -- "adcq %%r10, %%rbx ;" -- /******************************************/ -- "adcq $0, %%rcx ;" -- -- "movq 8(%1), %%rdx; " /* A[1] */ -- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -- "addq %%r15, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%r15 ;" -- -- "addq %%r9, %%rax ;" -- "adcq %%r11, %%rbx ;" -- "adcq %%r13, %%rcx ;" -- "adcq $0, %%r15 ;" -- -- "movq 16(%1), %%rdx; " /* A[2] */ -- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -- "addq %%rax, %%r8 ;" -- "movq %%r8, 16(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rax ;" -- -- "addq %%r9, %%rbx ;" -- "adcq %%r11, %%rcx ;" -- "adcq %%r13, %%r15 ;" -- "adcq $0, %%rax ;" -- -- "movq 24(%1), %%rdx; " /* A[3] */ -- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -- "addq %%rbx, %%r8 ;" -- "movq %%r8, 24(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rbx ;" -- -- "addq %%r9, %%rcx ;" -- "movq %%rcx, 32(%0) ;" -- "adcq %%r11, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "adcq %%r13, %%rax ;" -- "movq %%rax, 48(%0) ;" -- "adcq $0, %%rbx ;" -- "movq %%rbx, 56(%0) ;" -- -- "movq 32(%1), %%rdx; " /* C[0] */ -- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ -- "movq %%r8, 64(%0) ;" -- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ -- "addq %%r10, %%r15 ;" -- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ -- "adcq %%r8, %%rax ;" -- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ -- "adcq %%r10, %%rbx ;" -- /******************************************/ -- "adcq $0, %%rcx ;" -- -- "movq 40(%1), %%rdx; " /* C[1] */ -- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ -- "addq %%r15, %%r8 ;" -- "movq %%r8, 72(%0) ;" -- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%r15 ;" -- -- "addq %%r9, %%rax ;" -- "adcq %%r11, %%rbx ;" -- "adcq %%r13, %%rcx ;" -- "adcq $0, %%r15 ;" -- -- "movq 48(%1), %%rdx; " /* C[2] */ -- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ -- "addq %%rax, %%r8 ;" -- "movq %%r8, 80(%0) ;" -- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rax ;" -- -- "addq %%r9, %%rbx ;" -- "adcq %%r11, %%rcx ;" -- "adcq %%r13, %%r15 ;" -- "adcq $0, %%rax ;" -- -- "movq 56(%1), %%rdx; " /* C[3] */ -- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ -- "addq %%rbx, %%r8 ;" -- "movq %%r8, 88(%0) ;" -- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rbx ;" -- -- "addq %%r9, %%rcx ;" -- "movq %%rcx, 96(%0) ;" -- "adcq %%r11, %%r15 ;" -- "movq %%r15, 104(%0) ;" -- "adcq %%r13, %%rax ;" -- "movq %%rax, 112(%0) ;" -- "adcq $0, %%rbx ;" -- "movq %%rbx, 120(%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11", "%r13", "%r15"); -+ u64 x = a ^ b; -+ u64 minus_x = ~x + (u64)1U; -+ u64 x_or_minus_x = x | minus_x; -+ u64 xnx = x_or_minus_x >> (u32)63U; -+ return xnx - (u64)1U; - } - --static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) -+static __always_inline u64 gte_mask(u64 a, u64 b) - { -- asm volatile( -- "movq (%1), %%rdx ;" /* A[0] */ -- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ -- "xorl %%r15d, %%r15d;" -- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ -- "adcx %%r14, %%r9 ;" -- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ -- "adcx %%rax, %%r10 ;" -- "movq 24(%1), %%rdx ;" /* A[3] */ -- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ -- "adcx %%rcx, %%r11 ;" -- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ -- "adcx %%rax, %%rbx ;" -- "movq 8(%1), %%rdx ;" /* A[1] */ -- "adcx %%r15, %%r13 ;" -- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ -- "movq $0, %%r14 ;" -- /******************************************/ -- "adcx %%r15, %%r14 ;" -- -- "xorl %%r15d, %%r15d;" -- "adox %%rax, %%r10 ;" -- "adcx %%r8, %%r8 ;" -- "adox %%rcx, %%r11 ;" -- "adcx %%r9, %%r9 ;" -- "adox %%r15, %%rbx ;" -- "adcx %%r10, %%r10 ;" -- "adox %%r15, %%r13 ;" -- "adcx %%r11, %%r11 ;" -- "adox %%r15, %%r14 ;" -- "adcx %%rbx, %%rbx ;" -- "adcx %%r13, %%r13 ;" -- "adcx %%r14, %%r14 ;" -- -- "movq (%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -- /*******************/ -- "movq %%rax, 0(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "movq 8(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 16(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 24(%0) ;" -- "movq 16(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 32(%0) ;" -- "adcq %%rcx, %%rbx ;" -- "movq %%rbx, 40(%0) ;" -- "movq 24(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 48(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 56(%0) ;" -- -- -- "movq 32(%1), %%rdx ;" /* B[0] */ -- "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ -- "xorl %%r15d, %%r15d;" -- "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ -- "adcx %%r14, %%r9 ;" -- "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ -- "adcx %%rax, %%r10 ;" -- "movq 56(%1), %%rdx ;" /* B[3] */ -- "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ -- "adcx %%rcx, %%r11 ;" -- "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ -- "adcx %%rax, %%rbx ;" -- "movq 40(%1), %%rdx ;" /* B[1] */ -- "adcx %%r15, %%r13 ;" -- "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ -- "movq $0, %%r14 ;" -- /******************************************/ -- "adcx %%r15, %%r14 ;" -- -- "xorl %%r15d, %%r15d;" -- "adox %%rax, %%r10 ;" -- "adcx %%r8, %%r8 ;" -- "adox %%rcx, %%r11 ;" -- "adcx %%r9, %%r9 ;" -- "adox %%r15, %%rbx ;" -- "adcx %%r10, %%r10 ;" -- "adox %%r15, %%r13 ;" -- "adcx %%r11, %%r11 ;" -- "adox %%r15, %%r14 ;" -- "adcx %%rbx, %%rbx ;" -- "adcx %%r13, %%r13 ;" -- "adcx %%r14, %%r14 ;" -- -- "movq 32(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ -- /*******************/ -- "movq %%rax, 64(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 72(%0) ;" -- "movq 40(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 80(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 88(%0) ;" -- "movq 48(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 96(%0) ;" -- "adcq %%rcx, %%rbx ;" -- "movq %%rbx, 104(%0) ;" -- "movq 56(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 112(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 120(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11", "%r13", "%r14", "%r15"); -+ u64 x = a; -+ u64 y = b; -+ u64 x_xor_y = x ^ y; -+ u64 x_sub_y = x - y; -+ u64 x_sub_y_xor_y = x_sub_y ^ y; -+ u64 q = x_xor_y | x_sub_y_xor_y; -+ u64 x_xor_q = x ^ q; -+ u64 x_xor_q_ = x_xor_q >> (u32)63U; -+ return x_xor_q_ - (u64)1U; - } - --static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) -+/* Computes the addition of four-element f1 with value in f2 -+ * and returns the carry (if any) */ -+static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) - { -- asm volatile( -- "movq 8(%1), %%rdx ;" /* A[1] */ -- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ -- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ -- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ -- -- "movq 16(%1), %%rdx ;" /* A[2] */ -- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ -- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ -- -- "addq %%rax, %%r9 ;" -- "adcq %%rdx, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq %%r14, %%r15 ;" -- "adcq $0, %%r13 ;" -- "movq $0, %%r14 ;" -- "adcq $0, %%r14 ;" -- -- "movq (%1), %%rdx ;" /* A[0] */ -- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ -- -- "addq %%rax, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq $0, %%r15 ;" -- "adcq $0, %%r13 ;" -- "adcq $0, %%r14 ;" -- -- "shldq $1, %%r13, %%r14 ;" -- "shldq $1, %%r15, %%r13 ;" -- "shldq $1, %%r11, %%r15 ;" -- "shldq $1, %%r10, %%r11 ;" -- "shldq $1, %%r9, %%r10 ;" -- "shldq $1, %%r8, %%r9 ;" -- "shlq $1, %%r8 ;" -- -- /*******************/ -- "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ -- /*******************/ -- "movq %%rax, 0(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "movq 8(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 16(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 24(%0) ;" -- "movq 16(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 32(%0) ;" -- "adcq %%rcx, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "movq 24(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 48(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 56(%0) ;" -- -- "movq 40(%1), %%rdx ;" /* B[1] */ -- "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ -- "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ -- "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ -- -- "movq 48(%1), %%rdx ;" /* B[2] */ -- "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ -- "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ -- -- "addq %%rax, %%r9 ;" -- "adcq %%rdx, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq %%r14, %%r15 ;" -- "adcq $0, %%r13 ;" -- "movq $0, %%r14 ;" -- "adcq $0, %%r14 ;" -- -- "movq 32(%1), %%rdx ;" /* B[0] */ -- "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ -- -- "addq %%rax, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq $0, %%r15 ;" -- "adcq $0, %%r13 ;" -- "adcq $0, %%r14 ;" -- -- "shldq $1, %%r13, %%r14 ;" -- "shldq $1, %%r15, %%r13 ;" -- "shldq $1, %%r11, %%r15 ;" -- "shldq $1, %%r10, %%r11 ;" -- "shldq $1, %%r9, %%r10 ;" -- "shldq $1, %%r8, %%r9 ;" -- "shlq $1, %%r8 ;" -- -- /*******************/ -- "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ -- /*******************/ -- "movq %%rax, 64(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 72(%0) ;" -- "movq 40(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 80(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 88(%0) ;" -- "movq 48(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 96(%0) ;" -- "adcq %%rcx, %%r15 ;" -- "movq %%r15, 104(%0) ;" -- "movq 56(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 112(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 120(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -- "%r11", "%r13", "%r14", "%r15"); --} -+ u64 carry_r; - --static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) --{ - asm volatile( -- "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ -- "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ -- "xorl %%ebx, %%ebx ;" -- "adox (%1), %%r8 ;" -- "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ -- "adcx %%r10, %%r9 ;" -- "adox 8(%1), %%r9 ;" -- "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ -- "adcx %%r11, %%r10 ;" -- "adox 16(%1), %%r10 ;" -- "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ -- "adcx %%rax, %%r11 ;" -- "adox 24(%1), %%r11 ;" -- /***************************************/ -- "adcx %%rbx, %%rcx ;" -- "adox %%rbx, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -- "adcx %%rcx, %%r8 ;" -- "adcx %%rbx, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcx %%rbx, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcx %%rbx, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- -- "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ -- "xorl %%ebx, %%ebx ;" -- "adox 64(%1), %%r8 ;" -- "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ -- "adcx %%r10, %%r9 ;" -- "adox 72(%1), %%r9 ;" -- "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ -- "adcx %%r11, %%r10 ;" -- "adox 80(%1), %%r10 ;" -- "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ -- "adcx %%rax, %%r11 ;" -- "adox 88(%1), %%r11 ;" -- /****************************************/ -- "adcx %%rbx, %%rcx ;" -- "adox %%rbx, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -- "adcx %%rcx, %%r8 ;" -- "adcx %%rbx, %%r9 ;" -- "movq %%r9, 40(%0) ;" -- "adcx %%rbx, %%r10 ;" -- "movq %%r10, 48(%0) ;" -- "adcx %%rbx, %%r11 ;" -- "movq %%r11, 56(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 32(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11"); --} -+ /* Clear registers to propagate the carry bit */ -+ " xor %%r8, %%r8;" -+ " xor %%r9, %%r9;" -+ " xor %%r10, %%r10;" -+ " xor %%r11, %%r11;" -+ " xor %1, %1;" -+ -+ /* Begin addition chain */ -+ " addq 0(%3), %0;" -+ " movq %0, 0(%2);" -+ " adcxq 8(%3), %%r8;" -+ " movq %%r8, 8(%2);" -+ " adcxq 16(%3), %%r9;" -+ " movq %%r9, 16(%2);" -+ " adcxq 24(%3), %%r10;" -+ " movq %%r10, 24(%2);" -+ -+ /* Return the carry bit in a register */ -+ " adcx %%r11, %1;" -+ : "+&r" (f2), "=&r" (carry_r) -+ : "r" (out), "r" (f1) -+ : "%r8", "%r9", "%r10", "%r11", "memory", "cc" -+ ); - --static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) --{ -- asm volatile( -- "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ -- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -- "addq %%r10, %%r9 ;" -- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -- "adcq %%r11, %%r10 ;" -- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -- "adcq %%rax, %%r11 ;" -- /***************************************/ -- "adcq $0, %%rcx ;" -- "addq (%1), %%r8 ;" -- "adcq 8(%1), %%r9 ;" -- "adcq 16(%1), %%r10 ;" -- "adcq 24(%1), %%r11 ;" -- "adcq $0, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -- "addq %%rcx, %%r8 ;" -- "adcq $0, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcq $0, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcq $0, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- -- "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ -- "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ -- "addq %%r10, %%r9 ;" -- "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ -- "adcq %%r11, %%r10 ;" -- "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ -- "adcq %%rax, %%r11 ;" -- /****************************************/ -- "adcq $0, %%rcx ;" -- "addq 64(%1), %%r8 ;" -- "adcq 72(%1), %%r9 ;" -- "adcq 80(%1), %%r10 ;" -- "adcq 88(%1), %%r11 ;" -- "adcq $0, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -- "addq %%rcx, %%r8 ;" -- "adcq $0, %%r9 ;" -- "movq %%r9, 40(%0) ;" -- "adcq $0, %%r10 ;" -- "movq %%r10, 48(%0) ;" -- "adcq $0, %%r11 ;" -- "movq %%r11, 56(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 32(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -- "%r11"); -+ return carry_r; - } - --static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, -- const u64 *const b) -+/* Computes the field addition of two field elements */ -+static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) - { - asm volatile( -- "movq (%1), %%rdx; " /* A[0] */ -- "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ -- "xorl %%r10d, %%r10d ;" -- "movq %%r8, (%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ -- "adox %%r9, %%r10 ;" -- "movq %%r10, 8(%0) ;" -- "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ -- "adox %%r11, %%r15 ;" -- "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ -- "adox %%r13, %%r14 ;" -- "movq $0, %%rax ;" -- /******************************************/ -- "adox %%rdx, %%rax ;" -- -- "movq 8(%1), %%rdx; " /* A[1] */ -- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -- "xorl %%r10d, %%r10d ;" -- "adcx 8(%0), %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -- "adox %%r9, %%r10 ;" -- "adcx %%r15, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ -- "adox %%r11, %%r15 ;" -- "adcx %%r14, %%r15 ;" -- "movq $0, %%r8 ;" -- "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ -- "adox %%r13, %%r14 ;" -- "adcx %%rax, %%r14 ;" -- "movq $0, %%rax ;" -- /******************************************/ -- "adox %%rdx, %%rax ;" -- "adcx %%r8, %%rax ;" -- -- "movq 16(%1), %%rdx; " /* A[2] */ -- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -- "xorl %%r10d, %%r10d ;" -- "adcx 16(%0), %%r8 ;" -- "movq %%r8, 16(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -- "adox %%r9, %%r10 ;" -- "adcx %%r15, %%r10 ;" -- "movq %%r10, 24(%0) ;" -- "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ -- "adox %%r11, %%r15 ;" -- "adcx %%r14, %%r15 ;" -- "movq $0, %%r8 ;" -- "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ -- "adox %%r13, %%r14 ;" -- "adcx %%rax, %%r14 ;" -- "movq $0, %%rax ;" -- /******************************************/ -- "adox %%rdx, %%rax ;" -- "adcx %%r8, %%rax ;" -- -- "movq 24(%1), %%rdx; " /* A[3] */ -- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -- "xorl %%r10d, %%r10d ;" -- "adcx 24(%0), %%r8 ;" -- "movq %%r8, 24(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -- "adox %%r9, %%r10 ;" -- "adcx %%r15, %%r10 ;" -- "movq %%r10, 32(%0) ;" -- "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ -- "adox %%r11, %%r15 ;" -- "adcx %%r14, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "movq $0, %%r8 ;" -- "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ -- "adox %%r13, %%r14 ;" -- "adcx %%rax, %%r14 ;" -- "movq %%r14, 48(%0) ;" -- "movq $0, %%rax ;" -- /******************************************/ -- "adox %%rdx, %%rax ;" -- "adcx %%r8, %%rax ;" -- "movq %%rax, 56(%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", -- "%r13", "%r14", "%r15"); -+ /* Compute the raw addition of f1 + f2 */ -+ " movq 0(%0), %%r8;" -+ " addq 0(%2), %%r8;" -+ " movq 8(%0), %%r9;" -+ " adcxq 8(%2), %%r9;" -+ " movq 16(%0), %%r10;" -+ " adcxq 16(%2), %%r10;" -+ " movq 24(%0), %%r11;" -+ " adcxq 24(%2), %%r11;" -+ -+ /* Wrap the result back into the field */ -+ -+ /* Step 1: Compute carry*38 */ -+ " mov $0, %%rax;" -+ " mov $38, %0;" -+ " cmovc %0, %%rax;" -+ -+ /* Step 2: Add carry*38 to the original sum */ -+ " xor %%rcx, %%rcx;" -+ " add %%rax, %%r8;" -+ " adcx %%rcx, %%r9;" -+ " movq %%r9, 8(%1);" -+ " adcx %%rcx, %%r10;" -+ " movq %%r10, 16(%1);" -+ " adcx %%rcx, %%r11;" -+ " movq %%r11, 24(%1);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %0, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%1);" -+ : "+&r" (f2) -+ : "r" (out), "r" (f1) -+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" -+ ); - } - --static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, -- const u64 *const b) -+/* Computes the field substraction of two field elements */ -+static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) - { - asm volatile( -- "movq (%1), %%rdx; " /* A[0] */ -- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ -- "movq %%r8, (%0) ;" -- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ -- "addq %%r10, %%r15 ;" -- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ -- "adcq %%r8, %%rax ;" -- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ -- "adcq %%r10, %%rbx ;" -- /******************************************/ -- "adcq $0, %%rcx ;" -- -- "movq 8(%1), %%rdx; " /* A[1] */ -- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ -- "addq %%r15, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%r15 ;" -- -- "addq %%r9, %%rax ;" -- "adcq %%r11, %%rbx ;" -- "adcq %%r13, %%rcx ;" -- "adcq $0, %%r15 ;" -- -- "movq 16(%1), %%rdx; " /* A[2] */ -- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ -- "addq %%rax, %%r8 ;" -- "movq %%r8, 16(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rax ;" -- -- "addq %%r9, %%rbx ;" -- "adcq %%r11, %%rcx ;" -- "adcq %%r13, %%r15 ;" -- "adcq $0, %%rax ;" -- -- "movq 24(%1), %%rdx; " /* A[3] */ -- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ -- "addq %%rbx, %%r8 ;" -- "movq %%r8, 24(%0) ;" -- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ -- "adcq %%r10, %%r9 ;" -- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ -- "adcq %%r8, %%r11 ;" -- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ -- "adcq %%r10, %%r13 ;" -- /******************************************/ -- "adcq $0, %%rbx ;" -- -- "addq %%r9, %%rcx ;" -- "movq %%rcx, 32(%0) ;" -- "adcq %%r11, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "adcq %%r13, %%rax ;" -- "movq %%rax, 48(%0) ;" -- "adcq $0, %%rbx ;" -- "movq %%rbx, 56(%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11", "%r13", "%r15"); -+ /* Compute the raw substraction of f1-f2 */ -+ " movq 0(%1), %%r8;" -+ " subq 0(%2), %%r8;" -+ " movq 8(%1), %%r9;" -+ " sbbq 8(%2), %%r9;" -+ " movq 16(%1), %%r10;" -+ " sbbq 16(%2), %%r10;" -+ " movq 24(%1), %%r11;" -+ " sbbq 24(%2), %%r11;" -+ -+ /* Wrap the result back into the field */ -+ -+ /* Step 1: Compute carry*38 */ -+ " mov $0, %%rax;" -+ " mov $38, %%rcx;" -+ " cmovc %%rcx, %%rax;" -+ -+ /* Step 2: Substract carry*38 from the original difference */ -+ " sub %%rax, %%r8;" -+ " sbb $0, %%r9;" -+ " sbb $0, %%r10;" -+ " sbb $0, %%r11;" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rcx, %%rax;" -+ " sub %%rax, %%r8;" -+ -+ /* Store the result */ -+ " movq %%r8, 0(%0);" -+ " movq %%r9, 8(%0);" -+ " movq %%r10, 16(%0);" -+ " movq %%r11, 24(%0);" -+ : -+ : "r" (out), "r" (f1), "r" (f2) -+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" -+ ); - } - --static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) -+/* Computes a field multiplication: out <- f1 * f2 -+ * Uses the 8-element buffer tmp for intermediate results */ -+static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) - { - asm volatile( -- "movq (%1), %%rdx ;" /* A[0] */ -- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ -- "xorl %%r15d, %%r15d;" -- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ -- "adcx %%r14, %%r9 ;" -- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ -- "adcx %%rax, %%r10 ;" -- "movq 24(%1), %%rdx ;" /* A[3] */ -- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ -- "adcx %%rcx, %%r11 ;" -- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ -- "adcx %%rax, %%rbx ;" -- "movq 8(%1), %%rdx ;" /* A[1] */ -- "adcx %%r15, %%r13 ;" -- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ -- "movq $0, %%r14 ;" -- /******************************************/ -- "adcx %%r15, %%r14 ;" -- -- "xorl %%r15d, %%r15d;" -- "adox %%rax, %%r10 ;" -- "adcx %%r8, %%r8 ;" -- "adox %%rcx, %%r11 ;" -- "adcx %%r9, %%r9 ;" -- "adox %%r15, %%rbx ;" -- "adcx %%r10, %%r10 ;" -- "adox %%r15, %%r13 ;" -- "adcx %%r11, %%r11 ;" -- "adox %%r15, %%r14 ;" -- "adcx %%rbx, %%rbx ;" -- "adcx %%r13, %%r13 ;" -- "adcx %%r14, %%r14 ;" -- -- "movq (%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -- /*******************/ -- "movq %%rax, 0(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "movq 8(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 16(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 24(%0) ;" -- "movq 16(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 32(%0) ;" -- "adcq %%rcx, %%rbx ;" -- "movq %%rbx, 40(%0) ;" -- "movq 24(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 48(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 56(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11", "%r13", "%r14", "%r15"); --} -+ /* Compute the raw multiplication: tmp <- src1 * src2 */ - --static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) --{ -- asm volatile( -- "movq 8(%1), %%rdx ;" /* A[1] */ -- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ -- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ -- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ -- -- "movq 16(%1), %%rdx ;" /* A[2] */ -- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ -- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ -- -- "addq %%rax, %%r9 ;" -- "adcq %%rdx, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq %%r14, %%r15 ;" -- "adcq $0, %%r13 ;" -- "movq $0, %%r14 ;" -- "adcq $0, %%r14 ;" -- -- "movq (%1), %%rdx ;" /* A[0] */ -- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ -- -- "addq %%rax, %%r10 ;" -- "adcq %%rcx, %%r11 ;" -- "adcq $0, %%r15 ;" -- "adcq $0, %%r13 ;" -- "adcq $0, %%r14 ;" -- -- "shldq $1, %%r13, %%r14 ;" -- "shldq $1, %%r15, %%r13 ;" -- "shldq $1, %%r11, %%r15 ;" -- "shldq $1, %%r10, %%r11 ;" -- "shldq $1, %%r9, %%r10 ;" -- "shldq $1, %%r8, %%r9 ;" -- "shlq $1, %%r8 ;" -- -- /*******************/ -- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ -- /*******************/ -- "movq %%rax, 0(%0) ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, 8(%0) ;" -- "movq 8(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ -- "adcq %%rax, %%r9 ;" -- "movq %%r9, 16(%0) ;" -- "adcq %%rcx, %%r10 ;" -- "movq %%r10, 24(%0) ;" -- "movq 16(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ -- "adcq %%rax, %%r11 ;" -- "movq %%r11, 32(%0) ;" -- "adcq %%rcx, %%r15 ;" -- "movq %%r15, 40(%0) ;" -- "movq 24(%1), %%rdx ;" -- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ -- "adcq %%rax, %%r13 ;" -- "movq %%r13, 48(%0) ;" -- "adcq %%rcx, %%r14 ;" -- "movq %%r14, 56(%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -- "%r11", "%r13", "%r14", "%r15"); -+ /* Compute src1[0] * src2 */ -+ " movq 0(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" -+ /* Compute src1[1] * src2 */ -+ " movq 8(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[2] * src2 */ -+ " movq 16(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[3] * src2 */ -+ " movq 24(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" -+ /* Line up pointers */ -+ " mov %0, %1;" -+ " mov %2, %0;" -+ -+ /* Wrap the result back into the field */ -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 32(%1), %%r8, %%r13;" -+ " xor %3, %3;" -+ " adoxq 0(%1), %%r8;" -+ " mulxq 40(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 8(%1), %%r9;" -+ " mulxq 48(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 16(%1), %%r10;" -+ " mulxq 56(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 24(%1), %%r11;" -+ " adcx %3, %%rax;" -+ " adox %3, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %3, %%r9;" -+ " movq %%r9, 8(%0);" -+ " adcx %3, %%r10;" -+ " movq %%r10, 16(%0);" -+ " adcx %3, %%r11;" -+ " movq %%r11, 24(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%0);" -+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) -+ : -+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" -+ ); - } - --static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) -+/* Computes two field multiplications: -+ * out[0] <- f1[0] * f2[0] -+ * out[1] <- f1[1] * f2[1] -+ * Uses the 16-element buffer tmp for intermediate results. */ -+static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) - { - asm volatile( -- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ -- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -- "xorl %%ebx, %%ebx ;" -- "adox (%1), %%r8 ;" -- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -- "adcx %%r10, %%r9 ;" -- "adox 8(%1), %%r9 ;" -- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -- "adcx %%r11, %%r10 ;" -- "adox 16(%1), %%r10 ;" -- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -- "adcx %%rax, %%r11 ;" -- "adox 24(%1), %%r11 ;" -- /***************************************/ -- "adcx %%rbx, %%rcx ;" -- "adox %%rbx, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ -- "adcx %%rcx, %%r8 ;" -- "adcx %%rbx, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcx %%rbx, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcx %%rbx, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", -- "%r10", "%r11"); --} -+ /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ - --static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) --{ -- asm volatile( -- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ -- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ -- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ -- "addq %%r10, %%r9 ;" -- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ -- "adcq %%r11, %%r10 ;" -- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ -- "adcq %%rax, %%r11 ;" -- /***************************************/ -- "adcq $0, %%rcx ;" -- "addq (%1), %%r8 ;" -- "adcq 8(%1), %%r9 ;" -- "adcq 16(%1), %%r10 ;" -- "adcq 24(%1), %%r11 ;" -- "adcq $0, %%rcx ;" -- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ -- "addq %%rcx, %%r8 ;" -- "adcq $0, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcq $0, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcq $0, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a) -- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -- "%r11"); -+ /* Compute src1[0] * src2 */ -+ " movq 0(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" -+ /* Compute src1[1] * src2 */ -+ " movq 8(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[2] * src2 */ -+ " movq 16(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[3] * src2 */ -+ " movq 24(%1), %%rdx;" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" -+ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" -+ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" -+ -+ /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ -+ -+ /* Compute src1[0] * src2 */ -+ " movq 32(%1), %%rdx;" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" -+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" -+ /* Compute src1[1] * src2 */ -+ " movq 40(%1), %%rdx;" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" -+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[2] * src2 */ -+ " movq 48(%1), %%rdx;" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" -+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" -+ /* Compute src1[3] * src2 */ -+ " movq 56(%1), %%rdx;" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" -+ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" -+ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" -+ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" -+ /* Line up pointers */ -+ " mov %0, %1;" -+ " mov %2, %0;" -+ -+ /* Wrap the results back into the field */ -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 32(%1), %%r8, %%r13;" -+ " xor %3, %3;" -+ " adoxq 0(%1), %%r8;" -+ " mulxq 40(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 8(%1), %%r9;" -+ " mulxq 48(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 16(%1), %%r10;" -+ " mulxq 56(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 24(%1), %%r11;" -+ " adcx %3, %%rax;" -+ " adox %3, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %3, %%r9;" -+ " movq %%r9, 8(%0);" -+ " adcx %3, %%r10;" -+ " movq %%r10, 16(%0);" -+ " adcx %3, %%r11;" -+ " movq %%r11, 24(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%0);" -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 96(%1), %%r8, %%r13;" -+ " xor %3, %3;" -+ " adoxq 64(%1), %%r8;" -+ " mulxq 104(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 72(%1), %%r9;" -+ " mulxq 112(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 80(%1), %%r10;" -+ " mulxq 120(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 88(%1), %%r11;" -+ " adcx %3, %%rax;" -+ " adox %3, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %3, %%r9;" -+ " movq %%r9, 40(%0);" -+ " adcx %3, %%r10;" -+ " movq %%r10, 48(%0);" -+ " adcx %3, %%r11;" -+ " movq %%r11, 56(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 32(%0);" -+ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) -+ : -+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" -+ ); - } - --static __always_inline void --add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) -+/* Computes the field multiplication of four-element f1 with value in f2 */ -+static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) - { -- asm volatile( -- "mov $38, %%eax ;" -- "xorl %%ecx, %%ecx ;" -- "movq (%2), %%r8 ;" -- "adcx (%1), %%r8 ;" -- "movq 8(%2), %%r9 ;" -- "adcx 8(%1), %%r9 ;" -- "movq 16(%2), %%r10 ;" -- "adcx 16(%1), %%r10 ;" -- "movq 24(%2), %%r11 ;" -- "adcx 24(%1), %%r11 ;" -- "cmovc %%eax, %%ecx ;" -- "xorl %%eax, %%eax ;" -- "adcx %%rcx, %%r8 ;" -- "adcx %%rax, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcx %%rax, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcx %%rax, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $38, %%ecx ;" -- "cmovc %%ecx, %%eax ;" -- "addq %%rax, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); --} -+ register u64 f2_r asm("rdx") = f2; - --static __always_inline void --add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) --{ - asm volatile( -- "mov $38, %%eax ;" -- "movq (%2), %%r8 ;" -- "addq (%1), %%r8 ;" -- "movq 8(%2), %%r9 ;" -- "adcq 8(%1), %%r9 ;" -- "movq 16(%2), %%r10 ;" -- "adcq 16(%1), %%r10 ;" -- "movq 24(%2), %%r11 ;" -- "adcq 24(%1), %%r11 ;" -- "mov $0, %%ecx ;" -- "cmovc %%eax, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "adcq $0, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcq $0, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcq $0, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%eax, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -+ /* Compute the raw multiplication of f1*f2 */ -+ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ -+ " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ -+ " add %%rcx, %%r9;" -+ " mov $0, %%rcx;" -+ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ -+ " adcx %%r12, %%r10;" -+ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ -+ " adcx %%r13, %%r11;" -+ " adcx %%rcx, %%rax;" -+ -+ /* Wrap the result back into the field */ -+ -+ /* Step 1: Compute carry*38 */ -+ " mov $38, %%rdx;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %%rcx, %%r9;" -+ " movq %%r9, 8(%1);" -+ " adcx %%rcx, %%r10;" -+ " movq %%r10, 16(%1);" -+ " adcx %%rcx, %%r11;" -+ " movq %%r11, 24(%1);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%1);" -+ : "+&r" (f2_r) -+ : "r" (out), "r" (f1) -+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" -+ ); - } - --static __always_inline void --sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) --{ -- asm volatile( -- "mov $38, %%eax ;" -- "movq (%1), %%r8 ;" -- "subq (%2), %%r8 ;" -- "movq 8(%1), %%r9 ;" -- "sbbq 8(%2), %%r9 ;" -- "movq 16(%1), %%r10 ;" -- "sbbq 16(%2), %%r10 ;" -- "movq 24(%1), %%r11 ;" -- "sbbq 24(%2), %%r11 ;" -- "mov $0, %%ecx ;" -- "cmovc %%eax, %%ecx ;" -- "subq %%rcx, %%r8 ;" -- "sbbq $0, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "sbbq $0, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "sbbq $0, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%eax, %%ecx ;" -- "subq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a), "r"(b) -- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); --} -- --/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ --static __always_inline void --mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) -+/* Computes p1 <- bit ? p2 : p1 in constant time */ -+static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) - { -- const u64 a24 = 121666; - asm volatile( -- "movq %2, %%rdx ;" -- "mulx (%1), %%r8, %%r10 ;" -- "mulx 8(%1), %%r9, %%r11 ;" -- "addq %%r10, %%r9 ;" -- "mulx 16(%1), %%r10, %%rax ;" -- "adcq %%r11, %%r10 ;" -- "mulx 24(%1), %%r11, %%rcx ;" -- "adcq %%rax, %%r11 ;" -- /**************************/ -- "adcq $0, %%rcx ;" -- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ -- "imul %%rdx, %%rcx ;" -- "addq %%rcx, %%r8 ;" -- "adcq $0, %%r9 ;" -- "movq %%r9, 8(%0) ;" -- "adcq $0, %%r10 ;" -- "movq %%r10, 16(%0) ;" -- "adcq $0, %%r11 ;" -- "movq %%r11, 24(%0) ;" -- "mov $0, %%ecx ;" -- "cmovc %%edx, %%ecx ;" -- "addq %%rcx, %%r8 ;" -- "movq %%r8, (%0) ;" -- : -- : "r"(c), "r"(a), "r"(a24) -- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", -- "%r11"); --} -- --static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) --{ -- struct { -- eltfp25519_1w_buffer buffer; -- eltfp25519_1w x0, x1, x2; -- } __aligned(32) m; -- u64 *T[4]; -- -- T[0] = m.x0; -- T[1] = c; /* x^(-1) */ -- T[2] = m.x1; -- T[3] = m.x2; -- -- copy_eltfp25519_1w(T[1], a); -- sqrn_eltfp25519_1w_adx(T[1], 1); -- copy_eltfp25519_1w(T[2], T[1]); -- sqrn_eltfp25519_1w_adx(T[2], 2); -- mul_eltfp25519_1w_adx(T[0], a, T[2]); -- mul_eltfp25519_1w_adx(T[1], T[1], T[0]); -- copy_eltfp25519_1w(T[2], T[1]); -- sqrn_eltfp25519_1w_adx(T[2], 1); -- mul_eltfp25519_1w_adx(T[0], T[0], T[2]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_adx(T[2], 5); -- mul_eltfp25519_1w_adx(T[0], T[0], T[2]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_adx(T[2], 10); -- mul_eltfp25519_1w_adx(T[2], T[2], T[0]); -- copy_eltfp25519_1w(T[3], T[2]); -- sqrn_eltfp25519_1w_adx(T[3], 20); -- mul_eltfp25519_1w_adx(T[3], T[3], T[2]); -- sqrn_eltfp25519_1w_adx(T[3], 10); -- mul_eltfp25519_1w_adx(T[3], T[3], T[0]); -- copy_eltfp25519_1w(T[0], T[3]); -- sqrn_eltfp25519_1w_adx(T[0], 50); -- mul_eltfp25519_1w_adx(T[0], T[0], T[3]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_adx(T[2], 100); -- mul_eltfp25519_1w_adx(T[2], T[2], T[0]); -- sqrn_eltfp25519_1w_adx(T[2], 50); -- mul_eltfp25519_1w_adx(T[2], T[2], T[3]); -- sqrn_eltfp25519_1w_adx(T[2], 5); -- mul_eltfp25519_1w_adx(T[1], T[1], T[2]); -- -- memzero_explicit(&m, sizeof(m)); --} -- --static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) --{ -- struct { -- eltfp25519_1w_buffer buffer; -- eltfp25519_1w x0, x1, x2; -- } __aligned(32) m; -- u64 *T[5]; -- -- T[0] = m.x0; -- T[1] = c; /* x^(-1) */ -- T[2] = m.x1; -- T[3] = m.x2; -- -- copy_eltfp25519_1w(T[1], a); -- sqrn_eltfp25519_1w_bmi2(T[1], 1); -- copy_eltfp25519_1w(T[2], T[1]); -- sqrn_eltfp25519_1w_bmi2(T[2], 2); -- mul_eltfp25519_1w_bmi2(T[0], a, T[2]); -- mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); -- copy_eltfp25519_1w(T[2], T[1]); -- sqrn_eltfp25519_1w_bmi2(T[2], 1); -- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_bmi2(T[2], 5); -- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_bmi2(T[2], 10); -- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); -- copy_eltfp25519_1w(T[3], T[2]); -- sqrn_eltfp25519_1w_bmi2(T[3], 20); -- mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); -- sqrn_eltfp25519_1w_bmi2(T[3], 10); -- mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); -- copy_eltfp25519_1w(T[0], T[3]); -- sqrn_eltfp25519_1w_bmi2(T[0], 50); -- mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); -- copy_eltfp25519_1w(T[2], T[0]); -- sqrn_eltfp25519_1w_bmi2(T[2], 100); -- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); -- sqrn_eltfp25519_1w_bmi2(T[2], 50); -- mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); -- sqrn_eltfp25519_1w_bmi2(T[2], 5); -- mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); -+ /* Invert the polarity of bit to match cmov expectations */ -+ " add $18446744073709551615, %0;" - -- memzero_explicit(&m, sizeof(m)); -+ /* cswap p1[0], p2[0] */ -+ " movq 0(%1), %%r8;" -+ " movq 0(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 0(%1);" -+ " movq %%r9, 0(%2);" -+ -+ /* cswap p1[1], p2[1] */ -+ " movq 8(%1), %%r8;" -+ " movq 8(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 8(%1);" -+ " movq %%r9, 8(%2);" -+ -+ /* cswap p1[2], p2[2] */ -+ " movq 16(%1), %%r8;" -+ " movq 16(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 16(%1);" -+ " movq %%r9, 16(%2);" -+ -+ /* cswap p1[3], p2[3] */ -+ " movq 24(%1), %%r8;" -+ " movq 24(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 24(%1);" -+ " movq %%r9, 24(%2);" -+ -+ /* cswap p1[4], p2[4] */ -+ " movq 32(%1), %%r8;" -+ " movq 32(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 32(%1);" -+ " movq %%r9, 32(%2);" -+ -+ /* cswap p1[5], p2[5] */ -+ " movq 40(%1), %%r8;" -+ " movq 40(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 40(%1);" -+ " movq %%r9, 40(%2);" -+ -+ /* cswap p1[6], p2[6] */ -+ " movq 48(%1), %%r8;" -+ " movq 48(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 48(%1);" -+ " movq %%r9, 48(%2);" -+ -+ /* cswap p1[7], p2[7] */ -+ " movq 56(%1), %%r8;" -+ " movq 56(%2), %%r9;" -+ " mov %%r8, %%r10;" -+ " cmovc %%r9, %%r8;" -+ " cmovc %%r10, %%r9;" -+ " movq %%r8, 56(%1);" -+ " movq %%r9, 56(%2);" -+ : "+&r" (bit) -+ : "r" (p1), "r" (p2) -+ : "%r8", "%r9", "%r10", "memory", "cc" -+ ); - } - --/* Given c, a 256-bit number, fred_eltfp25519_1w updates c -- * with a number such that 0 <= C < 2**255-19. -- */ --static __always_inline void fred_eltfp25519_1w(u64 *const c) -+/* Computes the square of a field element: out <- f * f -+ * Uses the 8-element buffer tmp for intermediate results */ -+static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) - { -- u64 tmp0 = 38, tmp1 = 19; - asm volatile( -- "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ -- "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ -- -- /* Add either 19 or 38 to c */ -- "addq %4, %0 ;" -- "adcq $0, %1 ;" -- "adcq $0, %2 ;" -- "adcq $0, %3 ;" -- -- /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ -- "movl $0, %k4 ;" -- "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ -- "btrq $63, %3 ;" /* Clear bit 255 */ -- -- /* Subtract 19 if necessary */ -- "subq %4, %0 ;" -- "sbbq $0, %1 ;" -- "sbbq $0, %2 ;" -- "sbbq $0, %3 ;" -- -- : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), -- "+r"(tmp1) -- : -- : "memory", "cc"); --} -+ /* Compute the raw multiplication: tmp <- f * f */ - --static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) --{ -- u64 temp; -- asm volatile( -- "test %9, %9 ;" -- "movq %0, %8 ;" -- "cmovnzq %4, %0 ;" -- "cmovnzq %8, %4 ;" -- "movq %1, %8 ;" -- "cmovnzq %5, %1 ;" -- "cmovnzq %8, %5 ;" -- "movq %2, %8 ;" -- "cmovnzq %6, %2 ;" -- "cmovnzq %8, %6 ;" -- "movq %3, %8 ;" -- "cmovnzq %7, %3 ;" -- "cmovnzq %8, %7 ;" -- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), -- "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), -- "=r"(temp) -- : "r"(bit) -- : "cc" -+ /* Step 1: Compute all partial products */ -+ " movq 0(%1), %%rdx;" /* f[0] */ -+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ -+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ -+ " movq 24(%1), %%rdx;" /* f[3] */ -+ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ -+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ -+ -+ /* Step 2: Compute two parallel carry chains */ -+ " xor %%r15, %%r15;" -+ " adox %%rax, %%r10;" -+ " adcx %%r8, %%r8;" -+ " adox %%rcx, %%r11;" -+ " adcx %%r9, %%r9;" -+ " adox %%r15, %%r12;" -+ " adcx %%r10, %%r10;" -+ " adox %%r15, %%r13;" -+ " adcx %%r11, %%r11;" -+ " adox %%r15, %%r14;" -+ " adcx %%r12, %%r12;" -+ " adcx %%r13, %%r13;" -+ " adcx %%r14, %%r14;" -+ -+ /* Step 3: Compute intermediate squares */ -+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ -+ " movq %%rax, 0(%0);" -+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);" -+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ -+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" -+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" -+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ -+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" -+ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" -+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ -+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" -+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" -+ -+ /* Line up pointers */ -+ " mov %0, %1;" -+ " mov %2, %0;" -+ -+ /* Wrap the result back into the field */ -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 32(%1), %%r8, %%r13;" -+ " xor %%rcx, %%rcx;" -+ " adoxq 0(%1), %%r8;" -+ " mulxq 40(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 8(%1), %%r9;" -+ " mulxq 48(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 16(%1), %%r10;" -+ " mulxq 56(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 24(%1), %%r11;" -+ " adcx %%rcx, %%rax;" -+ " adox %%rcx, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %%rcx, %%r9;" -+ " movq %%r9, 8(%0);" -+ " adcx %%rcx, %%r10;" -+ " movq %%r10, 16(%0);" -+ " adcx %%rcx, %%r11;" -+ " movq %%r11, 24(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%0);" -+ : "+&r" (tmp), "+&r" (f), "+&r" (out) -+ : -+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" - ); - } - --static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) -+/* Computes two field squarings: -+ * out[0] <- f[0] * f[0] -+ * out[1] <- f[1] * f[1] -+ * Uses the 16-element buffer tmp for intermediate results */ -+static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) - { - asm volatile( -- "test %4, %4 ;" -- "cmovnzq %5, %0 ;" -- "cmovnzq %6, %1 ;" -- "cmovnzq %7, %2 ;" -- "cmovnzq %8, %3 ;" -- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) -- : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) -- : "cc" -+ /* Step 1: Compute all partial products */ -+ " movq 0(%1), %%rdx;" /* f[0] */ -+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ -+ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ -+ " movq 24(%1), %%rdx;" /* f[3] */ -+ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ -+ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ -+ -+ /* Step 2: Compute two parallel carry chains */ -+ " xor %%r15, %%r15;" -+ " adox %%rax, %%r10;" -+ " adcx %%r8, %%r8;" -+ " adox %%rcx, %%r11;" -+ " adcx %%r9, %%r9;" -+ " adox %%r15, %%r12;" -+ " adcx %%r10, %%r10;" -+ " adox %%r15, %%r13;" -+ " adcx %%r11, %%r11;" -+ " adox %%r15, %%r14;" -+ " adcx %%r12, %%r12;" -+ " adcx %%r13, %%r13;" -+ " adcx %%r14, %%r14;" -+ -+ /* Step 3: Compute intermediate squares */ -+ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ -+ " movq %%rax, 0(%0);" -+ " add %%rcx, %%r8;" " movq %%r8, 8(%0);" -+ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ -+ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" -+ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" -+ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ -+ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" -+ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" -+ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ -+ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" -+ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" -+ -+ /* Step 1: Compute all partial products */ -+ " movq 32(%1), %%rdx;" /* f[0] */ -+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ -+ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ -+ " movq 56(%1), %%rdx;" /* f[3] */ -+ " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ -+ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ -+ -+ /* Step 2: Compute two parallel carry chains */ -+ " xor %%r15, %%r15;" -+ " adox %%rax, %%r10;" -+ " adcx %%r8, %%r8;" -+ " adox %%rcx, %%r11;" -+ " adcx %%r9, %%r9;" -+ " adox %%r15, %%r12;" -+ " adcx %%r10, %%r10;" -+ " adox %%r15, %%r13;" -+ " adcx %%r11, %%r11;" -+ " adox %%r15, %%r14;" -+ " adcx %%r12, %%r12;" -+ " adcx %%r13, %%r13;" -+ " adcx %%r14, %%r14;" -+ -+ /* Step 3: Compute intermediate squares */ -+ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ -+ " movq %%rax, 64(%0);" -+ " add %%rcx, %%r8;" " movq %%r8, 72(%0);" -+ " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ -+ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" -+ " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" -+ " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ -+ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" -+ " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" -+ " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ -+ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" -+ " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" -+ -+ /* Line up pointers */ -+ " mov %0, %1;" -+ " mov %2, %0;" -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 32(%1), %%r8, %%r13;" -+ " xor %%rcx, %%rcx;" -+ " adoxq 0(%1), %%r8;" -+ " mulxq 40(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 8(%1), %%r9;" -+ " mulxq 48(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 16(%1), %%r10;" -+ " mulxq 56(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 24(%1), %%r11;" -+ " adcx %%rcx, %%rax;" -+ " adox %%rcx, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %%rcx, %%r9;" -+ " movq %%r9, 8(%0);" -+ " adcx %%rcx, %%r10;" -+ " movq %%r10, 16(%0);" -+ " adcx %%rcx, %%r11;" -+ " movq %%r11, 24(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 0(%0);" -+ -+ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ -+ " mov $38, %%rdx;" -+ " mulxq 96(%1), %%r8, %%r13;" -+ " xor %%rcx, %%rcx;" -+ " adoxq 64(%1), %%r8;" -+ " mulxq 104(%1), %%r9, %%r12;" -+ " adcx %%r13, %%r9;" -+ " adoxq 72(%1), %%r9;" -+ " mulxq 112(%1), %%r10, %%r13;" -+ " adcx %%r12, %%r10;" -+ " adoxq 80(%1), %%r10;" -+ " mulxq 120(%1), %%r11, %%rax;" -+ " adcx %%r13, %%r11;" -+ " adoxq 88(%1), %%r11;" -+ " adcx %%rcx, %%rax;" -+ " adox %%rcx, %%rax;" -+ " imul %%rdx, %%rax;" -+ -+ /* Step 2: Fold the carry back into dst */ -+ " add %%rax, %%r8;" -+ " adcx %%rcx, %%r9;" -+ " movq %%r9, 40(%0);" -+ " adcx %%rcx, %%r10;" -+ " movq %%r10, 48(%0);" -+ " adcx %%rcx, %%r11;" -+ " movq %%r11, 56(%0);" -+ -+ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ -+ " mov $0, %%rax;" -+ " cmovc %%rdx, %%rax;" -+ " add %%rax, %%r8;" -+ " movq %%r8, 32(%0);" -+ : "+&r" (tmp), "+&r" (f), "+&r" (out) -+ : -+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" - ); - } - --static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], -- const u8 private_key[CURVE25519_KEY_SIZE], -- const u8 session_key[CURVE25519_KEY_SIZE]) --{ -- struct { -- u64 buffer[4 * NUM_WORDS_ELTFP25519]; -- u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -- u64 workspace[6 * NUM_WORDS_ELTFP25519]; -- u8 session[CURVE25519_KEY_SIZE]; -- u8 private[CURVE25519_KEY_SIZE]; -- } __aligned(32) m; -- -- int i = 0, j = 0; -- u64 prev = 0; -- u64 *const X1 = (u64 *)m.session; -- u64 *const key = (u64 *)m.private; -- u64 *const Px = m.coordinates + 0; -- u64 *const Pz = m.coordinates + 4; -- u64 *const Qx = m.coordinates + 8; -- u64 *const Qz = m.coordinates + 12; -- u64 *const X2 = Qx; -- u64 *const Z2 = Qz; -- u64 *const X3 = Px; -- u64 *const Z3 = Pz; -- u64 *const X2Z2 = Qx; -- u64 *const X3Z3 = Px; -- -- u64 *const A = m.workspace + 0; -- u64 *const B = m.workspace + 4; -- u64 *const D = m.workspace + 8; -- u64 *const C = m.workspace + 12; -- u64 *const DA = m.workspace + 16; -- u64 *const CB = m.workspace + 20; -- u64 *const AB = A; -- u64 *const DC = D; -- u64 *const DACB = DA; -- -- memcpy(m.private, private_key, sizeof(m.private)); -- memcpy(m.session, session_key, sizeof(m.session)); -- -- curve25519_clamp_secret(m.private); -- -- /* As in the draft: -- * When receiving such an array, implementations of curve25519 -- * MUST mask the most-significant bit in the final byte. This -- * is done to preserve compatibility with point formats which -- * reserve the sign bit for use in other protocols and to -- * increase resistance to implementation fingerprinting -- */ -- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; -- -- copy_eltfp25519_1w(Px, X1); -- setzero_eltfp25519_1w(Pz); -- setzero_eltfp25519_1w(Qx); -- setzero_eltfp25519_1w(Qz); -- -- Pz[0] = 1; -- Qx[0] = 1; -- -- /* main-loop */ -- prev = 0; -- j = 62; -- for (i = 3; i >= 0; --i) { -- while (j >= 0) { -- u64 bit = (key[i] >> j) & 0x1; -- u64 swap = bit ^ prev; -- prev = bit; -- -- add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ -- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ -- add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ -- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ -- mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ -- -- cselect(swap, A, C); -- cselect(swap, B, D); -- -- sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ -- add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ -- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ -- sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ -- -- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ -- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ -- -- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ -- add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ -- mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ -- mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ -- --j; -- } -- j = 63; -- } -- -- inv_eltfp25519_1w_adx(A, Qz); -- mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); -- fred_eltfp25519_1w((u64 *)shared); -- -- memzero_explicit(&m, sizeof(m)); --} -- --static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], -- const u8 private_key[CURVE25519_KEY_SIZE]) -+static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) - { -- struct { -- u64 buffer[4 * NUM_WORDS_ELTFP25519]; -- u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -- u64 workspace[4 * NUM_WORDS_ELTFP25519]; -- u8 private[CURVE25519_KEY_SIZE]; -- } __aligned(32) m; -- -- const int ite[4] = { 64, 64, 64, 63 }; -- const int q = 3; -- u64 swap = 1; -- -- int i = 0, j = 0, k = 0; -- u64 *const key = (u64 *)m.private; -- u64 *const Ur1 = m.coordinates + 0; -- u64 *const Zr1 = m.coordinates + 4; -- u64 *const Ur2 = m.coordinates + 8; -- u64 *const Zr2 = m.coordinates + 12; -- -- u64 *const UZr1 = m.coordinates + 0; -- u64 *const ZUr2 = m.coordinates + 8; -- -- u64 *const A = m.workspace + 0; -- u64 *const B = m.workspace + 4; -- u64 *const C = m.workspace + 8; -- u64 *const D = m.workspace + 12; -- -- u64 *const AB = m.workspace + 0; -- u64 *const CD = m.workspace + 8; -- -- const u64 *const P = table_ladder_8k; -- -- memcpy(m.private, private_key, sizeof(m.private)); -- -- curve25519_clamp_secret(m.private); -- -- setzero_eltfp25519_1w(Ur1); -- setzero_eltfp25519_1w(Zr1); -- setzero_eltfp25519_1w(Zr2); -- Ur1[0] = 1; -- Zr1[0] = 1; -- Zr2[0] = 1; -- -- /* G-S */ -- Ur2[3] = 0x1eaecdeee27cab34UL; -- Ur2[2] = 0xadc7a0b9235d48e2UL; -- Ur2[1] = 0xbbf095ae14b2edf8UL; -- Ur2[0] = 0x7e94e1fec82faabdUL; -- -- /* main-loop */ -- j = q; -- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { -- while (j < ite[i]) { -- u64 bit = (key[i] >> j) & 0x1; -- k = (64 * i + j - q); -- swap = swap ^ bit; -- cswap(swap, Ur1, Ur2); -- cswap(swap, Zr1, Zr2); -- swap = bit; -- /* Addition */ -- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -- mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ -- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ -- add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ -- sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ -- mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ -- ++j; -+ u64 *nq = p01_tmp1; -+ u64 *nq_p1 = p01_tmp1 + (u32)8U; -+ u64 *tmp1 = p01_tmp1 + (u32)16U; -+ u64 *x1 = q; -+ u64 *x2 = nq; -+ u64 *z2 = nq + (u32)4U; -+ u64 *z3 = nq_p1 + (u32)4U; -+ u64 *a = tmp1; -+ u64 *b = tmp1 + (u32)4U; -+ u64 *ab = tmp1; -+ u64 *dc = tmp1 + (u32)8U; -+ u64 *x3; -+ u64 *z31; -+ u64 *d0; -+ u64 *c0; -+ u64 *a1; -+ u64 *b1; -+ u64 *d; -+ u64 *c; -+ u64 *ab1; -+ u64 *dc1; -+ fadd(a, x2, z2); -+ fsub(b, x2, z2); -+ x3 = nq_p1; -+ z31 = nq_p1 + (u32)4U; -+ d0 = dc; -+ c0 = dc + (u32)4U; -+ fadd(c0, x3, z31); -+ fsub(d0, x3, z31); -+ fmul2(dc, dc, ab, tmp2); -+ fadd(x3, d0, c0); -+ fsub(z31, d0, c0); -+ a1 = tmp1; -+ b1 = tmp1 + (u32)4U; -+ d = tmp1 + (u32)8U; -+ c = tmp1 + (u32)12U; -+ ab1 = tmp1; -+ dc1 = tmp1 + (u32)8U; -+ fsqr2(dc1, ab1, tmp2); -+ fsqr2(nq_p1, nq_p1, tmp2); -+ a1[0U] = c[0U]; -+ a1[1U] = c[1U]; -+ a1[2U] = c[2U]; -+ a1[3U] = c[3U]; -+ fsub(c, d, c); -+ fmul_scalar(b1, c, (u64)121665U); -+ fadd(b1, b1, d); -+ fmul2(nq, dc1, ab1, tmp2); -+ fmul(z3, z3, x1, tmp2); -+} -+ -+static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) -+{ -+ u64 *x2 = nq; -+ u64 *z2 = nq + (u32)4U; -+ u64 *a = tmp1; -+ u64 *b = tmp1 + (u32)4U; -+ u64 *d = tmp1 + (u32)8U; -+ u64 *c = tmp1 + (u32)12U; -+ u64 *ab = tmp1; -+ u64 *dc = tmp1 + (u32)8U; -+ fadd(a, x2, z2); -+ fsub(b, x2, z2); -+ fsqr2(dc, ab, tmp2); -+ a[0U] = c[0U]; -+ a[1U] = c[1U]; -+ a[2U] = c[2U]; -+ a[3U] = c[3U]; -+ fsub(c, d, c); -+ fmul_scalar(b, c, (u64)121665U); -+ fadd(b, b, d); -+ fmul2(nq, dc, ab, tmp2); -+} -+ -+static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) -+{ -+ u64 tmp2[16U] = { 0U }; -+ u64 p01_tmp1_swap[33U] = { 0U }; -+ u64 *p0 = p01_tmp1_swap; -+ u64 *p01 = p01_tmp1_swap; -+ u64 *p03 = p01; -+ u64 *p11 = p01 + (u32)8U; -+ u64 *x0; -+ u64 *z0; -+ u64 *p01_tmp1; -+ u64 *p01_tmp11; -+ u64 *nq10; -+ u64 *nq_p11; -+ u64 *swap1; -+ u64 sw0; -+ u64 *nq1; -+ u64 *tmp1; -+ memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); -+ x0 = p03; -+ z0 = p03 + (u32)4U; -+ x0[0U] = (u64)1U; -+ x0[1U] = (u64)0U; -+ x0[2U] = (u64)0U; -+ x0[3U] = (u64)0U; -+ z0[0U] = (u64)0U; -+ z0[1U] = (u64)0U; -+ z0[2U] = (u64)0U; -+ z0[3U] = (u64)0U; -+ p01_tmp1 = p01_tmp1_swap; -+ p01_tmp11 = p01_tmp1_swap; -+ nq10 = p01_tmp1_swap; -+ nq_p11 = p01_tmp1_swap + (u32)8U; -+ swap1 = p01_tmp1_swap + (u32)32U; -+ cswap2((u64)1U, nq10, nq_p11); -+ point_add_and_double(init1, p01_tmp11, tmp2); -+ swap1[0U] = (u64)1U; -+ { -+ u32 i; -+ for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { -+ u64 *p01_tmp12 = p01_tmp1_swap; -+ u64 *swap2 = p01_tmp1_swap + (u32)32U; -+ u64 *nq2 = p01_tmp12; -+ u64 *nq_p12 = p01_tmp12 + (u32)8U; -+ u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); -+ u64 sw = swap2[0U] ^ bit; -+ cswap2(sw, nq2, nq_p12); -+ point_add_and_double(init1, p01_tmp12, tmp2); -+ swap2[0U] = bit; - } -- j = 0; - } -- -- /* Doubling */ -- for (i = 0; i < q; ++i) { -- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -- sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ -- copy_eltfp25519_1w(C, B); /* C = B */ -- sub_eltfp25519_1w(B, A, B); /* B = A-B */ -- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ -- add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ -- mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ -- } -- -- /* Convert to affine coordinates */ -- inv_eltfp25519_1w_adx(A, Zr1); -- mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); -- fred_eltfp25519_1w((u64 *)session_key); -- -- memzero_explicit(&m, sizeof(m)); --} -- --static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], -- const u8 private_key[CURVE25519_KEY_SIZE], -- const u8 session_key[CURVE25519_KEY_SIZE]) --{ -- struct { -- u64 buffer[4 * NUM_WORDS_ELTFP25519]; -- u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -- u64 workspace[6 * NUM_WORDS_ELTFP25519]; -- u8 session[CURVE25519_KEY_SIZE]; -- u8 private[CURVE25519_KEY_SIZE]; -- } __aligned(32) m; -- -- int i = 0, j = 0; -- u64 prev = 0; -- u64 *const X1 = (u64 *)m.session; -- u64 *const key = (u64 *)m.private; -- u64 *const Px = m.coordinates + 0; -- u64 *const Pz = m.coordinates + 4; -- u64 *const Qx = m.coordinates + 8; -- u64 *const Qz = m.coordinates + 12; -- u64 *const X2 = Qx; -- u64 *const Z2 = Qz; -- u64 *const X3 = Px; -- u64 *const Z3 = Pz; -- u64 *const X2Z2 = Qx; -- u64 *const X3Z3 = Px; -- -- u64 *const A = m.workspace + 0; -- u64 *const B = m.workspace + 4; -- u64 *const D = m.workspace + 8; -- u64 *const C = m.workspace + 12; -- u64 *const DA = m.workspace + 16; -- u64 *const CB = m.workspace + 20; -- u64 *const AB = A; -- u64 *const DC = D; -- u64 *const DACB = DA; -- -- memcpy(m.private, private_key, sizeof(m.private)); -- memcpy(m.session, session_key, sizeof(m.session)); -- -- curve25519_clamp_secret(m.private); -- -- /* As in the draft: -- * When receiving such an array, implementations of curve25519 -- * MUST mask the most-significant bit in the final byte. This -- * is done to preserve compatibility with point formats which -- * reserve the sign bit for use in other protocols and to -- * increase resistance to implementation fingerprinting -- */ -- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; -- -- copy_eltfp25519_1w(Px, X1); -- setzero_eltfp25519_1w(Pz); -- setzero_eltfp25519_1w(Qx); -- setzero_eltfp25519_1w(Qz); -- -- Pz[0] = 1; -- Qx[0] = 1; -- -- /* main-loop */ -- prev = 0; -- j = 62; -- for (i = 3; i >= 0; --i) { -- while (j >= 0) { -- u64 bit = (key[i] >> j) & 0x1; -- u64 swap = bit ^ prev; -- prev = bit; -- -- add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ -- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ -- add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ -- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ -- mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ -- -- cselect(swap, A, C); -- cselect(swap, B, D); -- -- sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ -- add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ -- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ -- sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ -- -- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ -- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ -- -- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ -- add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ -- mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ -- mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ -- --j; -+ sw0 = swap1[0U]; -+ cswap2(sw0, nq10, nq_p11); -+ nq1 = p01_tmp1; -+ tmp1 = p01_tmp1 + (u32)16U; -+ point_double(nq1, tmp1, tmp2); -+ point_double(nq1, tmp1, tmp2); -+ point_double(nq1, tmp1, tmp2); -+ memcpy(out, p0, (u32)8U * sizeof(p0[0U])); -+ -+ memzero_explicit(tmp2, sizeof(tmp2)); -+ memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); -+} -+ -+static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) -+{ -+ u32 i; -+ fsqr(o, inp, tmp); -+ for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) -+ fsqr(o, o, tmp); -+} -+ -+static void finv(u64 *o, const u64 *i, u64 *tmp) -+{ -+ u64 t1[16U] = { 0U }; -+ u64 *a0 = t1; -+ u64 *b = t1 + (u32)4U; -+ u64 *c = t1 + (u32)8U; -+ u64 *t00 = t1 + (u32)12U; -+ u64 *tmp1 = tmp; -+ u64 *a; -+ u64 *t0; -+ fsquare_times(a0, i, tmp1, (u32)1U); -+ fsquare_times(t00, a0, tmp1, (u32)2U); -+ fmul(b, t00, i, tmp); -+ fmul(a0, b, a0, tmp); -+ fsquare_times(t00, a0, tmp1, (u32)1U); -+ fmul(b, t00, b, tmp); -+ fsquare_times(t00, b, tmp1, (u32)5U); -+ fmul(b, t00, b, tmp); -+ fsquare_times(t00, b, tmp1, (u32)10U); -+ fmul(c, t00, b, tmp); -+ fsquare_times(t00, c, tmp1, (u32)20U); -+ fmul(t00, t00, c, tmp); -+ fsquare_times(t00, t00, tmp1, (u32)10U); -+ fmul(b, t00, b, tmp); -+ fsquare_times(t00, b, tmp1, (u32)50U); -+ fmul(c, t00, b, tmp); -+ fsquare_times(t00, c, tmp1, (u32)100U); -+ fmul(t00, t00, c, tmp); -+ fsquare_times(t00, t00, tmp1, (u32)50U); -+ fmul(t00, t00, b, tmp); -+ fsquare_times(t00, t00, tmp1, (u32)5U); -+ a = t1; -+ t0 = t1 + (u32)12U; -+ fmul(o, t0, a, tmp); -+} -+ -+static void store_felem(u64 *b, u64 *f) -+{ -+ u64 f30 = f[3U]; -+ u64 top_bit0 = f30 >> (u32)63U; -+ u64 carry0; -+ u64 f31; -+ u64 top_bit; -+ u64 carry; -+ u64 f0; -+ u64 f1; -+ u64 f2; -+ u64 f3; -+ u64 m0; -+ u64 m1; -+ u64 m2; -+ u64 m3; -+ u64 mask; -+ u64 f0_; -+ u64 f1_; -+ u64 f2_; -+ u64 f3_; -+ u64 o0; -+ u64 o1; -+ u64 o2; -+ u64 o3; -+ f[3U] = f30 & (u64)0x7fffffffffffffffU; -+ carry0 = add_scalar(f, f, (u64)19U * top_bit0); -+ f31 = f[3U]; -+ top_bit = f31 >> (u32)63U; -+ f[3U] = f31 & (u64)0x7fffffffffffffffU; -+ carry = add_scalar(f, f, (u64)19U * top_bit); -+ f0 = f[0U]; -+ f1 = f[1U]; -+ f2 = f[2U]; -+ f3 = f[3U]; -+ m0 = gte_mask(f0, (u64)0xffffffffffffffedU); -+ m1 = eq_mask(f1, (u64)0xffffffffffffffffU); -+ m2 = eq_mask(f2, (u64)0xffffffffffffffffU); -+ m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); -+ mask = ((m0 & m1) & m2) & m3; -+ f0_ = f0 - (mask & (u64)0xffffffffffffffedU); -+ f1_ = f1 - (mask & (u64)0xffffffffffffffffU); -+ f2_ = f2 - (mask & (u64)0xffffffffffffffffU); -+ f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); -+ o0 = f0_; -+ o1 = f1_; -+ o2 = f2_; -+ o3 = f3_; -+ b[0U] = o0; -+ b[1U] = o1; -+ b[2U] = o2; -+ b[3U] = o3; -+} -+ -+static void encode_point(u8 *o, const u64 *i) -+{ -+ const u64 *x = i; -+ const u64 *z = i + (u32)4U; -+ u64 tmp[4U] = { 0U }; -+ u64 tmp_w[16U] = { 0U }; -+ finv(tmp, z, tmp_w); -+ fmul(tmp, tmp, x, tmp_w); -+ store_felem((u64 *)o, tmp); -+} -+ -+static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) -+{ -+ u64 init1[8U] = { 0U }; -+ u64 tmp[4U] = { 0U }; -+ u64 tmp3; -+ u64 *x; -+ u64 *z; -+ { -+ u32 i; -+ for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { -+ u64 *os = tmp; -+ const u8 *bj = pub + i * (u32)8U; -+ u64 u = *(u64 *)bj; -+ u64 r = u; -+ u64 x0 = r; -+ os[i] = x0; - } -- j = 63; - } -+ tmp3 = tmp[3U]; -+ tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; -+ x = init1; -+ z = init1 + (u32)4U; -+ z[0U] = (u64)1U; -+ z[1U] = (u64)0U; -+ z[2U] = (u64)0U; -+ z[3U] = (u64)0U; -+ x[0U] = tmp[0U]; -+ x[1U] = tmp[1U]; -+ x[2U] = tmp[2U]; -+ x[3U] = tmp[3U]; -+ montgomery_ladder(init1, priv, init1); -+ encode_point(out, init1); -+} -+ -+/* The below constants were generated using this sage script: -+ * -+ * #!/usr/bin/env sage -+ * import sys -+ * from sage.all import * -+ * def limbs(n): -+ * n = int(n) -+ * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) -+ * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l -+ * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) -+ * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] -+ * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) -+ * print("static const u64 table_ladder[] = {") -+ * p = ec.lift_x(9) -+ * for i in range(252): -+ * l = (p[0] + p[2]) / (p[0] - p[2]) -+ * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) -+ * p = p * 2 -+ * print("};") -+ * -+ */ - -- inv_eltfp25519_1w_bmi2(A, Qz); -- mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); -- fred_eltfp25519_1w((u64 *)shared); -+static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; - -- memzero_explicit(&m, sizeof(m)); --} -+static const u64 table_ladder[] = { -+ 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, -+ 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, -+ 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, -+ 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, -+ 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, -+ 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, -+ 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, -+ 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, -+ 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, -+ 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, -+ 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, -+ 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, -+ 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, -+ 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, -+ 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, -+ 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, -+ 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, -+ 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, -+ 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, -+ 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, -+ 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, -+ 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, -+ 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, -+ 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, -+ 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, -+ 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, -+ 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, -+ 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, -+ 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, -+ 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, -+ 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, -+ 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, -+ 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, -+ 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, -+ 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, -+ 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, -+ 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, -+ 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, -+ 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, -+ 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, -+ 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, -+ 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, -+ 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, -+ 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, -+ 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, -+ 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, -+ 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, -+ 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, -+ 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, -+ 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, -+ 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, -+ 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, -+ 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, -+ 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, -+ 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, -+ 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, -+ 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, -+ 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, -+ 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, -+ 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, -+ 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, -+ 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, -+ 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, -+ 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, -+ 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, -+ 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, -+ 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, -+ 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, -+ 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, -+ 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, -+ 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, -+ 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, -+ 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, -+ 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, -+ 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, -+ 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, -+ 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, -+ 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, -+ 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, -+ 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, -+ 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, -+ 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, -+ 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, -+ 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, -+ 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, -+ 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, -+ 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, -+ 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, -+ 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, -+ 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, -+ 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, -+ 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, -+ 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, -+ 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, -+ 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, -+ 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, -+ 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, -+ 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, -+ 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, -+ 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, -+ 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, -+ 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, -+ 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, -+ 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, -+ 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, -+ 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, -+ 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, -+ 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, -+ 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, -+ 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, -+ 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, -+ 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, -+ 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, -+ 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, -+ 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, -+ 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, -+ 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, -+ 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, -+ 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, -+ 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, -+ 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, -+ 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, -+ 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, -+ 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, -+ 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, -+ 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, -+ 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, -+ 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, -+ 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, -+ 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, -+ 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, -+ 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, -+ 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, -+ 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, -+ 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, -+ 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, -+ 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, -+ 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, -+ 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, -+ 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, -+ 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, -+ 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, -+ 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, -+ 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, -+ 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, -+ 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, -+ 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, -+ 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, -+ 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, -+ 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, -+ 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, -+ 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, -+ 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, -+ 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, -+ 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, -+ 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, -+ 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, -+ 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, -+ 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, -+ 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, -+ 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, -+ 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, -+ 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, -+ 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, -+ 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, -+ 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, -+ 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, -+ 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, -+ 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, -+ 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, -+ 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, -+ 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, -+ 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, -+ 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, -+ 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, -+ 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, -+ 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, -+ 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, -+ 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, -+ 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, -+ 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, -+ 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, -+ 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, -+ 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, -+ 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, -+ 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, -+ 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, -+ 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, -+ 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, -+ 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, -+ 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, -+ 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, -+ 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, -+ 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, -+ 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, -+ 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, -+ 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, -+ 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, -+ 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, -+ 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, -+ 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, -+ 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, -+ 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, -+ 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, -+ 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, -+ 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, -+ 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, -+ 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, -+ 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, -+ 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, -+ 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, -+ 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, -+ 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, -+ 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, -+ 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, -+ 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, -+ 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, -+ 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, -+ 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, -+ 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, -+ 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, -+ 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, -+ 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, -+ 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, -+ 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, -+ 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, -+ 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, -+ 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, -+ 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, -+ 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, -+ 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, -+ 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, -+ 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, -+ 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, -+ 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, -+ 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, -+ 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, -+ 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, -+ 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, -+ 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, -+ 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, -+ 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, -+ 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, -+ 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, -+ 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, -+ 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, -+ 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, -+ 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, -+ 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, -+ 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, -+ 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, -+ 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL -+}; - --static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], -- const u8 private_key[CURVE25519_KEY_SIZE]) -+static void curve25519_ever64_base(u8 *out, const u8 *priv) - { -- struct { -- u64 buffer[4 * NUM_WORDS_ELTFP25519]; -- u64 coordinates[4 * NUM_WORDS_ELTFP25519]; -- u64 workspace[4 * NUM_WORDS_ELTFP25519]; -- u8 private[CURVE25519_KEY_SIZE]; -- } __aligned(32) m; -- -- const int ite[4] = { 64, 64, 64, 63 }; -- const int q = 3; - u64 swap = 1; -- -- int i = 0, j = 0, k = 0; -- u64 *const key = (u64 *)m.private; -- u64 *const Ur1 = m.coordinates + 0; -- u64 *const Zr1 = m.coordinates + 4; -- u64 *const Ur2 = m.coordinates + 8; -- u64 *const Zr2 = m.coordinates + 12; -- -- u64 *const UZr1 = m.coordinates + 0; -- u64 *const ZUr2 = m.coordinates + 8; -- -- u64 *const A = m.workspace + 0; -- u64 *const B = m.workspace + 4; -- u64 *const C = m.workspace + 8; -- u64 *const D = m.workspace + 12; -- -- u64 *const AB = m.workspace + 0; -- u64 *const CD = m.workspace + 8; -- -- const u64 *const P = table_ladder_8k; -- -- memcpy(m.private, private_key, sizeof(m.private)); -- -- curve25519_clamp_secret(m.private); -- -- setzero_eltfp25519_1w(Ur1); -- setzero_eltfp25519_1w(Zr1); -- setzero_eltfp25519_1w(Zr2); -- Ur1[0] = 1; -- Zr1[0] = 1; -- Zr2[0] = 1; -- -- /* G-S */ -- Ur2[3] = 0x1eaecdeee27cab34UL; -- Ur2[2] = 0xadc7a0b9235d48e2UL; -- Ur2[1] = 0xbbf095ae14b2edf8UL; -- Ur2[0] = 0x7e94e1fec82faabdUL; -- -- /* main-loop */ -- j = q; -- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { -- while (j < ite[i]) { -- u64 bit = (key[i] >> j) & 0x1; -- k = (64 * i + j - q); -+ int i, j, k; -+ u64 tmp[16 + 32 + 4]; -+ u64 *x1 = &tmp[0]; -+ u64 *z1 = &tmp[4]; -+ u64 *x2 = &tmp[8]; -+ u64 *z2 = &tmp[12]; -+ u64 *xz1 = &tmp[0]; -+ u64 *xz2 = &tmp[8]; -+ u64 *a = &tmp[0 + 16]; -+ u64 *b = &tmp[4 + 16]; -+ u64 *c = &tmp[8 + 16]; -+ u64 *ab = &tmp[0 + 16]; -+ u64 *abcd = &tmp[0 + 16]; -+ u64 *ef = &tmp[16 + 16]; -+ u64 *efgh = &tmp[16 + 16]; -+ u64 *key = &tmp[0 + 16 + 32]; -+ -+ memcpy(key, priv, 32); -+ ((u8 *)key)[0] &= 248; -+ ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; -+ -+ x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; -+ z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; -+ z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; -+ memcpy(x2, p_minus_s, sizeof(p_minus_s)); -+ -+ j = 3; -+ for (i = 0; i < 4; ++i) { -+ while (j < (const int[]){ 64, 64, 64, 63 }[i]) { -+ u64 bit = (key[i] >> j) & 1; -+ k = (64 * i + j - 3); - swap = swap ^ bit; -- cswap(swap, Ur1, Ur2); -- cswap(swap, Zr1, Zr2); -+ cswap2(swap, xz1, xz2); - swap = bit; -- /* Addition */ -- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -- mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ -- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ -- add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ -- sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ -- mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ -+ fsub(b, x1, z1); -+ fadd(a, x1, z1); -+ fmul(c, &table_ladder[4 * k], b, ef); -+ fsub(b, a, c); -+ fadd(a, a, c); -+ fsqr2(ab, ab, efgh); -+ fmul2(xz1, xz2, ab, efgh); - ++j; - } - j = 0; - } - -- /* Doubling */ -- for (i = 0; i < q; ++i) { -- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ -- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ -- sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ -- copy_eltfp25519_1w(C, B); /* C = B */ -- sub_eltfp25519_1w(B, A, B); /* B = A-B */ -- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ -- add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ -- mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ -- } -- -- /* Convert to affine coordinates */ -- inv_eltfp25519_1w_bmi2(A, Zr1); -- mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); -- fred_eltfp25519_1w((u64 *)session_key); -+ point_double(xz1, abcd, efgh); -+ point_double(xz1, abcd, efgh); -+ point_double(xz1, abcd, efgh); -+ encode_point(out, xz1); - -- memzero_explicit(&m, sizeof(m)); -+ memzero_explicit(tmp, sizeof(tmp)); - } - -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); -+ - void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE], - const u8 basepoint[CURVE25519_KEY_SIZE]) - { -- if (static_branch_likely(&curve25519_use_adx)) -- curve25519_adx(mypublic, secret, basepoint); -- else if (static_branch_likely(&curve25519_use_bmi2)) -- curve25519_bmi2(mypublic, secret, basepoint); -+ if (static_branch_likely(&curve25519_use_bmi2_adx)) -+ curve25519_ever64(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); - } -@@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch); - void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], - const u8 secret[CURVE25519_KEY_SIZE]) - { -- if (static_branch_likely(&curve25519_use_adx)) -- curve25519_adx_base(pub, secret); -- else if (static_branch_likely(&curve25519_use_bmi2)) -- curve25519_bmi2_base(pub, secret); -+ if (static_branch_likely(&curve25519_use_bmi2_adx)) -+ curve25519_ever64_base(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); - } -@@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = { - .max_size = curve25519_max_size, - }; - -+ - static int __init curve25519_mod_init(void) - { -- if (boot_cpu_has(X86_FEATURE_BMI2)) -- static_branch_enable(&curve25519_use_bmi2); -- else if (boot_cpu_has(X86_FEATURE_ADX)) -- static_branch_enable(&curve25519_use_adx); -+ if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) -+ static_branch_enable(&curve25519_use_bmi2_adx); - else - return 0; - return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? -@@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit); - MODULE_ALIAS_CRYPTO("curve25519"); - MODULE_ALIAS_CRYPTO("curve25519-x86"); - MODULE_LICENSE("GPL v2"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0055-crypto-x86-curve25519-leave-r12-as-spare-register.patch b/target/linux/generic/backport-5.4/080-wireguard-0055-crypto-x86-curve25519-leave-r12-as-spare-register.patch deleted file mode 100644 index d5b11e0d36..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0055-crypto-x86-curve25519-leave-r12-as-spare-register.patch +++ /dev/null @@ -1,376 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 1 Mar 2020 16:06:56 +0800 -Subject: [PATCH] crypto: x86/curve25519 - leave r12 as spare register - -commit dc7fc3a53ae158263196b1892b672aedf67796c5 upstream. - -This updates to the newer register selection proved by HACL*, which -leads to a more compact instruction encoding, and saves around 100 -cycles. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++-------------- - 1 file changed, 55 insertions(+), 55 deletions(-) - ---- a/arch/x86/crypto/curve25519-x86_64.c -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const - " movq 0(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" - /* Line up pointers */ -@@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const - " mulxq 32(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 0(%1), %%r8;" -- " mulxq 40(%1), %%r9, %%r12;" -+ " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const - " movq %%r8, 0(%0);" - : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) - : -- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" -+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" - ); - } - -@@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const - " movq 0(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" -- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" -+ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" -+ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" - -@@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const - " movq 32(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" -- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" -+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 40(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" -- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" -+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 48(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" -- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" -+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 56(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" -- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" -+ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" - /* Line up pointers */ -@@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const - " mulxq 32(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 0(%1), %%r8;" -- " mulxq 40(%1), %%r9, %%r12;" -+ " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const - " mulxq 96(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 64(%1), %%r8;" -- " mulxq 104(%1), %%r9, %%r12;" -+ " mulxq 104(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 72(%1), %%r9;" - " mulxq 112(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 80(%1), %%r10;" - " mulxq 120(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const - " movq %%r8, 32(%0);" - : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) - : -- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" -+ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" - ); - } - -@@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out, - asm volatile( - /* Compute the raw multiplication of f1*f2 */ - " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ -- " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ -+ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ - " add %%rcx, %%r9;" - " mov $0, %%rcx;" - " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ - " adcx %%r13, %%r11;" - " adcx %%rcx, %%rax;" -@@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out, - " movq %%r8, 0(%1);" - : "+&r" (f2_r) - : "r" (out), "r" (f1) -- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" -+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" - ); - } - -@@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ -- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - -@@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" -- " adox %%r15, %%r12;" -+ " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" -- " adcx %%r12, %%r12;" -+ " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - -@@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const - " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" - " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" -- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" -+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" - " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" -@@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const - " mulxq 32(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 0(%1), %%r8;" -- " mulxq 40(%1), %%r9, %%r12;" -+ " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const - " movq %%r8, 0(%0);" - : "+&r" (tmp), "+&r" (f), "+&r" (out) - : -- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" -+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" - ); - } - -@@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ -- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - -@@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" -- " adox %%r15, %%r12;" -+ " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" -- " adcx %%r12, %%r12;" -+ " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - -@@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const - " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" - " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" -- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" -+ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" - " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" -@@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const - " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 56(%1), %%rdx;" /* f[3] */ -- " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -- " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ -+ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ -+ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - -@@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" - " adcx %%r9, %%r9;" -- " adox %%r15, %%r12;" -+ " adox %%r15, %%rbx;" - " adcx %%r10, %%r10;" - " adox %%r15, %%r13;" - " adcx %%r11, %%r11;" - " adox %%r15, %%r14;" -- " adcx %%r12, %%r12;" -+ " adcx %%rbx, %%rbx;" - " adcx %%r13, %%r13;" - " adcx %%r14, %%r14;" - -@@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const - " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" - " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" -- " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" -+ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" - " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" -@@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const - " mulxq 32(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 0(%1), %%r8;" -- " mulxq 40(%1), %%r9, %%r12;" -+ " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const - " mulxq 96(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 64(%1), %%r8;" -- " mulxq 104(%1), %%r9, %%r12;" -+ " mulxq 104(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" - " adoxq 72(%1), %%r9;" - " mulxq 112(%1), %%r10, %%r13;" -- " adcx %%r12, %%r10;" -+ " adcx %%rbx, %%r10;" - " adoxq 80(%1), %%r10;" - " mulxq 120(%1), %%r11, %%rax;" - " adcx %%r13, %%r11;" -@@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const - " movq %%r8, 32(%0);" - : "+&r" (tmp), "+&r" (f), "+&r" (out) - : -- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" -+ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" - ); - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0056-crypto-arm-64-poly1305-add-artifact-to-.gitignore-fi.patch b/target/linux/generic/backport-5.4/080-wireguard-0056-crypto-arm-64-poly1305-add-artifact-to-.gitignore-fi.patch deleted file mode 100644 index 655371630c..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0056-crypto-arm-64-poly1305-add-artifact-to-.gitignore-fi.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 19 Mar 2020 11:56:17 -0600 -Subject: [PATCH] crypto: arm[64]/poly1305 - add artifact to .gitignore files - -commit 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 upstream. - -The .S_shipped yields a .S, and the pattern in these directories is to -add that to .gitignore so that git-status doesn't raise a fuss. - -Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Reported-by: Emil Renner Berthing <kernel@esmil.dk> -Cc: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/.gitignore | 1 + - arch/arm64/crypto/.gitignore | 1 + - 2 files changed, 2 insertions(+) - ---- a/arch/arm/crypto/.gitignore -+++ b/arch/arm/crypto/.gitignore -@@ -1,3 +1,4 @@ - aesbs-core.S - sha256-core.S - sha512-core.S -+poly1305-core.S ---- a/arch/arm64/crypto/.gitignore -+++ b/arch/arm64/crypto/.gitignore -@@ -1,2 +1,3 @@ - sha256-core.S - sha512-core.S -+poly1305-core.S diff --git a/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch b/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch deleted file mode 100644 index f8828f243e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch +++ /dev/null @@ -1,243 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 23 Apr 2020 15:54:04 -0600 -Subject: [PATCH] crypto: arch/lib - limit simd usage to 4k chunks - -commit 706024a52c614b478b63f7728d202532ce6591a9 upstream. - -The initial Zinc patchset, after some mailing list discussion, contained -code to ensure that kernel_fpu_enable would not be kept on for more than -a 4k chunk, since it disables preemption. The choice of 4k isn't totally -scientific, but it's not a bad guess either, and it's what's used in -both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form -of PAGE_SIZE, which this commit corrects to be explicitly 4k for the -former two). - -Ard did some back of the envelope calculations and found that -at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k -means we have a maximum preemption disabling of 20us, which Sebastian -confirmed was probably a good limit. - -Unfortunately the chunking appears to have been left out of the final -patchset that added the glue code. So, this commit adds it back in. - -Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") -Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") -Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function") -Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel") -Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation") -Cc: Eric Biggers <ebiggers@google.com> -Cc: Ard Biesheuvel <ardb@kernel.org> -Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Reviewed-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 14 +++++++++++--- - arch/arm/crypto/poly1305-glue.c | 15 +++++++++++---- - arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++--- - arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++---- - arch/x86/crypto/blake2s-glue.c | 10 ++++------ - arch/x86/crypto/chacha_glue.c | 14 +++++++++++--- - arch/x86/crypto/poly1305_glue.c | 13 ++++++------- - 7 files changed, 65 insertions(+), 30 deletions(-) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - return; - } - -- kernel_neon_begin(); -- chacha_doneon(state, dst, src, bytes, nrounds); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, todo, nrounds); -+ kernel_neon_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly130 - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && do_neon) { -- kernel_neon_begin(); -- poly1305_blocks_neon(&dctx->h, src, len, 1); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, len, SZ_4K); -+ -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, todo, 1); -+ kernel_neon_end(); -+ -+ len -= todo; -+ src += todo; -+ } while (len); - } else { - poly1305_blocks_arm(&dctx->h, src, len, 1); -+ src += len; - } -- src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - -- kernel_neon_begin(); -- chacha_doneon(state, dst, src, bytes, nrounds); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, todo, nrounds); -+ kernel_neon_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/arm64/crypto/poly1305-glue.c -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly130 - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { -- kernel_neon_begin(); -- poly1305_blocks_neon(&dctx->h, src, len, 1); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, len, SZ_4K); -+ -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, todo, 1); -+ kernel_neon_end(); -+ -+ len -= todo; -+ src += todo; -+ } while (len); - } else { - poly1305_blocks(&dctx->h, src, len, 1); -+ src += len; - } -- src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - ---- a/arch/x86/crypto/blake2s-glue.c -+++ b/arch/x86/crypto/blake2s-glue.c -@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2 - const u32 inc) - { - /* SIMD disables preemption, so relax after processing each page. */ -- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); -+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); - - if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { - blake2s_compress_generic(state, block, nblocks, inc); - return; - } - -- for (;;) { -+ do { - const size_t blocks = min_t(size_t, nblocks, -- PAGE_SIZE / BLAKE2S_BLOCK_SIZE); -+ SZ_4K / BLAKE2S_BLOCK_SIZE); - - kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && -@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2 - kernel_fpu_end(); - - nblocks -= blocks; -- if (!nblocks) -- break; - block += blocks * BLAKE2S_BLOCK_SIZE; -- } -+ } while (nblocks); - } - EXPORT_SYMBOL(blake2s_compress_arch); - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - -- kernel_fpu_begin(); -- chacha_dosimd(state, dst, src, bytes, nrounds); -- kernel_fpu_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_fpu_begin(); -+ chacha_dosimd(state, dst, src, todo, nrounds); -+ kernel_fpu_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *c - struct poly1305_arch_internal *state = ctx; - - /* SIMD disables preemption, so relax after processing each page. */ -- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || -- PAGE_SIZE % POLY1305_BLOCK_SIZE); -+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || -+ SZ_4K % POLY1305_BLOCK_SIZE); - - if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || - (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || -@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *c - return; - } - -- for (;;) { -- const size_t bytes = min_t(size_t, len, PAGE_SIZE); -+ do { -+ const size_t bytes = min_t(size_t, len, SZ_4K); - - kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) -@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *c - else - poly1305_blocks_avx(ctx, inp, bytes, padbit); - kernel_fpu_end(); -+ - len -= bytes; -- if (!len) -- break; - inp += bytes; -- } -+ } while (len); - } - - static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], diff --git a/target/linux/generic/backport-5.4/080-wireguard-0058-crypto-lib-chacha20poly1305-Add-missing-function-dec.patch b/target/linux/generic/backport-5.4/080-wireguard-0058-crypto-lib-chacha20poly1305-Add-missing-function-dec.patch deleted file mode 100644 index 736147f934..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0058-crypto-lib-chacha20poly1305-Add-missing-function-dec.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Herbert Xu <herbert@gondor.apana.org.au> -Date: Wed, 8 Jul 2020 12:41:13 +1000 -Subject: [PATCH] crypto: lib/chacha20poly1305 - Add missing function - declaration - -commit 06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac upstream. - -This patch adds a declaration for chacha20poly1305_selftest to -silence a sparse warning. - -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/crypto/chacha20poly1305.h | 2 ++ - lib/crypto/chacha20poly1305.c | 2 -- - 2 files changed, 2 insertions(+), 2 deletions(-) - ---- a/include/crypto/chacha20poly1305.h -+++ b/include/crypto/chacha20poly1305.h -@@ -45,4 +45,6 @@ bool chacha20poly1305_decrypt_sg_inplace - const u64 nonce, - const u8 key[CHACHA20POLY1305_KEY_SIZE]); - -+bool chacha20poly1305_selftest(void); -+ - #endif /* __CHACHA20POLY1305_H */ ---- a/lib/crypto/chacha20poly1305.c -+++ b/lib/crypto/chacha20poly1305.c -@@ -21,8 +21,6 @@ - - #define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) - --bool __init chacha20poly1305_selftest(void); -- - static void chacha_load_key(u32 *k, const u8 *in) - { - k[0] = get_unaligned_le32(in); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0059-crypto-x86-chacha-sse3-use-unaligned-loads-for-state.patch b/target/linux/generic/backport-5.4/080-wireguard-0059-crypto-x86-chacha-sse3-use-unaligned-loads-for-state.patch deleted file mode 100644 index 52847877f6..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0059-crypto-x86-chacha-sse3-use-unaligned-loads-for-state.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Wed, 8 Jul 2020 12:11:18 +0300 -Subject: [PATCH] crypto: x86/chacha-sse3 - use unaligned loads for state array - -commit e79a31715193686e92dadb4caedfbb1f5de3659c upstream. - -Due to the fact that the x86 port does not support allocating objects -on the stack with an alignment that exceeds 8 bytes, we have a rather -ugly hack in the x86 code for ChaCha to ensure that the state array is -aligned to 16 bytes, allowing the SSE3 implementation of the algorithm -to use aligned loads. - -Given that the performance benefit of using of aligned loads appears to -be limited (~0.25% for 1k blocks using tcrypt on a Corei7-8650U), and -the fact that this hack has leaked into generic ChaCha code, let's just -remove it. - -Cc: Martin Willi <martin@strongswan.org> -Cc: Herbert Xu <herbert@gondor.apana.org.au> -Cc: Eric Biggers <ebiggers@kernel.org> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Reviewed-by: Martin Willi <martin@strongswan.org> -Reviewed-by: Eric Biggers <ebiggers@google.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/chacha-ssse3-x86_64.S | 16 ++++++++-------- - arch/x86/crypto/chacha_glue.c | 17 ++--------------- - include/crypto/chacha.h | 4 ---- - 3 files changed, 10 insertions(+), 27 deletions(-) - ---- a/arch/x86/crypto/chacha-ssse3-x86_64.S -+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S -@@ -120,10 +120,10 @@ ENTRY(chacha_block_xor_ssse3) - FRAME_BEGIN - - # x0..3 = s0..3 -- movdqa 0x00(%rdi),%xmm0 -- movdqa 0x10(%rdi),%xmm1 -- movdqa 0x20(%rdi),%xmm2 -- movdqa 0x30(%rdi),%xmm3 -+ movdqu 0x00(%rdi),%xmm0 -+ movdqu 0x10(%rdi),%xmm1 -+ movdqu 0x20(%rdi),%xmm2 -+ movdqu 0x30(%rdi),%xmm3 - movdqa %xmm0,%xmm8 - movdqa %xmm1,%xmm9 - movdqa %xmm2,%xmm10 -@@ -205,10 +205,10 @@ ENTRY(hchacha_block_ssse3) - # %edx: nrounds - FRAME_BEGIN - -- movdqa 0x00(%rdi),%xmm0 -- movdqa 0x10(%rdi),%xmm1 -- movdqa 0x20(%rdi),%xmm2 -- movdqa 0x30(%rdi),%xmm3 -+ movdqu 0x00(%rdi),%xmm0 -+ movdqu 0x10(%rdi),%xmm1 -+ movdqu 0x20(%rdi),%xmm2 -+ movdqu 0x30(%rdi),%xmm3 - - mov %edx,%r8d - call chacha_permute ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -14,8 +14,6 @@ - #include <linux/module.h> - #include <asm/simd.h> - --#define CHACHA_STATE_ALIGN 16 -- - asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, - unsigned int len, int nrounds); - asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, -@@ -125,8 +123,6 @@ static void chacha_dosimd(u32 *state, u8 - - void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) - { -- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -- - if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { - hchacha_block_generic(state, stream, nrounds); - } else { -@@ -139,8 +135,6 @@ EXPORT_SYMBOL(hchacha_block_arch); - - void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) - { -- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -- - chacha_init_generic(state, key, iv); - } - EXPORT_SYMBOL(chacha_init_arch); -@@ -148,8 +142,6 @@ EXPORT_SYMBOL(chacha_init_arch); - void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, - int nrounds) - { -- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); -- - if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); -@@ -171,15 +163,12 @@ EXPORT_SYMBOL(chacha_crypt_arch); - static int chacha_simd_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) - { -- u32 *state, state_buf[16 + 2] __aligned(8); -+ u32 state[CHACHA_STATE_WORDS] __aligned(8); - struct skcipher_walk walk; - int err; - - err = skcipher_walk_virt(&walk, req, false); - -- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); -- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); -- - chacha_init_generic(state, ctx->key, iv); - - while (walk.nbytes > 0) { -@@ -218,12 +207,10 @@ static int xchacha_simd(struct skcipher_ - { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -- u32 *state, state_buf[16 + 2] __aligned(8); -+ u32 state[CHACHA_STATE_WORDS] __aligned(8); - struct chacha_ctx subctx; - u8 real_iv[16]; - -- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); -- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - chacha_init_generic(state, ctx->key, req->iv); - - if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { ---- a/include/crypto/chacha.h -+++ b/include/crypto/chacha.h -@@ -25,11 +25,7 @@ - #define CHACHA_BLOCK_SIZE 64 - #define CHACHAPOLY_IV_SIZE 12 - --#ifdef CONFIG_X86_64 --#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32)) --#else - #define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) --#endif - - /* 192-bit nonce, then 64-bit stream position */ - #define XCHACHA_IV_SIZE 32 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0060-crypto-x86-curve25519-Remove-unused-carry-variables.patch b/target/linux/generic/backport-5.4/080-wireguard-0060-crypto-x86-curve25519-Remove-unused-carry-variables.patch deleted file mode 100644 index 5a2d20a982..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0060-crypto-x86-curve25519-Remove-unused-carry-variables.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Herbert Xu <herbert@gondor.apana.org.au> -Date: Thu, 23 Jul 2020 17:50:48 +1000 -Subject: [PATCH] crypto: x86/curve25519 - Remove unused carry variables - -commit 054a5540fb8f7268e2c79e9deab4242db15c8cba upstream. - -The carry variables are assigned but never used, which upsets -the compiler. This patch removes them. - -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Reviewed-by: Karthikeyan Bhargavan <karthik.bhargavan@gmail.com> -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/curve25519-x86_64.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - ---- a/arch/x86/crypto/curve25519-x86_64.c -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -948,10 +948,8 @@ static void store_felem(u64 *b, u64 *f) - { - u64 f30 = f[3U]; - u64 top_bit0 = f30 >> (u32)63U; -- u64 carry0; - u64 f31; - u64 top_bit; -- u64 carry; - u64 f0; - u64 f1; - u64 f2; -@@ -970,11 +968,11 @@ static void store_felem(u64 *b, u64 *f) - u64 o2; - u64 o3; - f[3U] = f30 & (u64)0x7fffffffffffffffU; -- carry0 = add_scalar(f, f, (u64)19U * top_bit0); -+ add_scalar(f, f, (u64)19U * top_bit0); - f31 = f[3U]; - top_bit = f31 >> (u32)63U; - f[3U] = f31 & (u64)0x7fffffffffffffffU; -- carry = add_scalar(f, f, (u64)19U * top_bit); -+ add_scalar(f, f, (u64)19U * top_bit); - f0 = f[0U]; - f1 = f[1U]; - f2 = f[2U]; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0061-crypto-arm-curve25519-include-linux-scatterlist.h.patch b/target/linux/generic/backport-5.4/080-wireguard-0061-crypto-arm-curve25519-include-linux-scatterlist.h.patch deleted file mode 100644 index b58fd08fc9..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0061-crypto-arm-curve25519-include-linux-scatterlist.h.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Fabio Estevam <festevam@gmail.com> -Date: Mon, 24 Aug 2020 11:09:53 -0300 -Subject: [PATCH] crypto: arm/curve25519 - include <linux/scatterlist.h> - -commit 6779d0e6b0fe193ab3010ea201782ca6f75a3862 upstream. - -Building ARM allmodconfig leads to the following warnings: - -arch/arm/crypto/curve25519-glue.c:73:12: error: implicit declaration of function 'sg_copy_to_buffer' [-Werror=implicit-function-declaration] -arch/arm/crypto/curve25519-glue.c:74:9: error: implicit declaration of function 'sg_nents_for_len' [-Werror=implicit-function-declaration] -arch/arm/crypto/curve25519-glue.c:88:11: error: implicit declaration of function 'sg_copy_from_buffer' [-Werror=implicit-function-declaration] - -Include <linux/scatterlist.h> to fix such warnings - -Reported-by: Olof's autobuilder <build@lixom.net> -Fixes: 0c3dc787a62a ("crypto: algapi - Remove skbuff.h inclusion") -Signed-off-by: Fabio Estevam <festevam@gmail.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/curve25519-glue.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/arm/crypto/curve25519-glue.c -+++ b/arch/arm/crypto/curve25519-glue.c -@@ -16,6 +16,7 @@ - #include <linux/module.h> - #include <linux/init.h> - #include <linux/jump_label.h> -+#include <linux/scatterlist.h> - #include <crypto/curve25519.h> - - asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], diff --git a/target/linux/generic/backport-5.4/080-wireguard-0062-crypto-arm-poly1305-Add-prototype-for-poly1305_block.patch b/target/linux/generic/backport-5.4/080-wireguard-0062-crypto-arm-poly1305-Add-prototype-for-poly1305_block.patch deleted file mode 100644 index cf3724a499..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0062-crypto-arm-poly1305-Add-prototype-for-poly1305_block.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Herbert Xu <herbert@gondor.apana.org.au> -Date: Tue, 25 Aug 2020 11:23:00 +1000 -Subject: [PATCH] crypto: arm/poly1305 - Add prototype for poly1305_blocks_neon - -commit 51982ea02aef972132eb35c583d3e4c5b83166e5 upstream. - -This patch adds a prototype for poly1305_blocks_neon to slience -a compiler warning: - - CC [M] arch/arm/crypto/poly1305-glue.o -../arch/arm/crypto/poly1305-glue.c:25:13: warning: no previous prototype for `poly1305_blocks_neon' [-Wmissing-prototypes] - void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) - ^~~~~~~~~~~~~~~~~~~~ - -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/poly1305-glue.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -20,6 +20,7 @@ - - void poly1305_init_arm(void *state, const u8 *key); - void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); -+void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); - void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); - - void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0063-crypto-curve25519-x86_64-Use-XORL-r32-32.patch b/target/linux/generic/backport-5.4/080-wireguard-0063-crypto-curve25519-x86_64-Use-XORL-r32-32.patch deleted file mode 100644 index dd76e2a1f2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0063-crypto-curve25519-x86_64-Use-XORL-r32-32.patch +++ /dev/null @@ -1,261 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Uros Bizjak <ubizjak@gmail.com> -Date: Thu, 27 Aug 2020 19:30:58 +0200 -Subject: [PATCH] crypto: curve25519-x86_64 - Use XORL r32,32 - -commit db719539fd3889836900bf912755aa30a5985e9a upstream. - -x86_64 zero extends 32bit operations, so for 64bit operands, -XORL r32,r32 is functionally equal to XORL r64,r64, but avoids -a REX prefix byte when legacy registers are used. - -Signed-off-by: Uros Bizjak <ubizjak@gmail.com> -Cc: Herbert Xu <herbert@gondor.apana.org.au> -Cc: "David S. Miller" <davem@davemloft.net> -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/curve25519-x86_64.c | 68 ++++++++++++++--------------- - 1 file changed, 34 insertions(+), 34 deletions(-) - ---- a/arch/x86/crypto/curve25519-x86_64.c -+++ b/arch/x86/crypto/curve25519-x86_64.c -@@ -45,11 +45,11 @@ static inline u64 add_scalar(u64 *out, c - - asm volatile( - /* Clear registers to propagate the carry bit */ -- " xor %%r8, %%r8;" -- " xor %%r9, %%r9;" -- " xor %%r10, %%r10;" -- " xor %%r11, %%r11;" -- " xor %1, %1;" -+ " xor %%r8d, %%r8d;" -+ " xor %%r9d, %%r9d;" -+ " xor %%r10d, %%r10d;" -+ " xor %%r11d, %%r11d;" -+ " xor %k1, %k1;" - - /* Begin addition chain */ - " addq 0(%3), %0;" -@@ -93,7 +93,7 @@ static inline void fadd(u64 *out, const - " cmovc %0, %%rax;" - - /* Step 2: Add carry*38 to the original sum */ -- " xor %%rcx, %%rcx;" -+ " xor %%ecx, %%ecx;" - " add %%rax, %%r8;" - " adcx %%rcx, %%r9;" - " movq %%r9, 8(%1);" -@@ -165,28 +165,28 @@ static inline void fmul(u64 *out, const - - /* Compute src1[0] * src2 */ - " movq 0(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" -@@ -200,7 +200,7 @@ static inline void fmul(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" -- " xor %3, %3;" -+ " xor %k3, %k3;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" -@@ -246,28 +246,28 @@ static inline void fmul2(u64 *out, const - - /* Compute src1[0] * src2 */ - " movq 0(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" -- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" -+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" -@@ -277,29 +277,29 @@ static inline void fmul2(u64 *out, const - - /* Compute src1[0] * src2 */ - " movq 32(%1), %%rdx;" -- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" - /* Compute src1[1] * src2 */ - " movq 40(%1), %%rdx;" -- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[2] * src2 */ - " movq 48(%1), %%rdx;" -- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" - /* Compute src1[3] * src2 */ - " movq 56(%1), %%rdx;" -- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" -- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" -+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" -+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" -@@ -312,7 +312,7 @@ static inline void fmul2(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" -- " xor %3, %3;" -+ " xor %k3, %k3;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" -@@ -345,7 +345,7 @@ static inline void fmul2(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 96(%1), %%r8, %%r13;" -- " xor %3, %3;" -+ " xor %k3, %k3;" - " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" -@@ -516,7 +516,7 @@ static inline void fsqr(u64 *out, const - - /* Step 1: Compute all partial products */ - " movq 0(%1), %%rdx;" /* f[0] */ -- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ -@@ -526,7 +526,7 @@ static inline void fsqr(u64 *out, const - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ -- " xor %%r15, %%r15;" -+ " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" -@@ -563,7 +563,7 @@ static inline void fsqr(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" -- " xor %%rcx, %%rcx;" -+ " xor %%ecx, %%ecx;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" -@@ -607,7 +607,7 @@ static inline void fsqr2(u64 *out, const - asm volatile( - /* Step 1: Compute all partial products */ - " movq 0(%1), %%rdx;" /* f[0] */ -- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ -@@ -617,7 +617,7 @@ static inline void fsqr2(u64 *out, const - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ -- " xor %%r15, %%r15;" -+ " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" -@@ -647,7 +647,7 @@ static inline void fsqr2(u64 *out, const - - /* Step 1: Compute all partial products */ - " movq 32(%1), %%rdx;" /* f[0] */ -- " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ -+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */ - " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 56(%1), %%rdx;" /* f[3] */ -@@ -657,7 +657,7 @@ static inline void fsqr2(u64 *out, const - " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ - - /* Step 2: Compute two parallel carry chains */ -- " xor %%r15, %%r15;" -+ " xor %%r15d, %%r15d;" - " adox %%rax, %%r10;" - " adcx %%r8, %%r8;" - " adox %%rcx, %%r11;" -@@ -692,7 +692,7 @@ static inline void fsqr2(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" -- " xor %%rcx, %%rcx;" -+ " xor %%ecx, %%ecx;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" -@@ -725,7 +725,7 @@ static inline void fsqr2(u64 *out, const - /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ - " mov $38, %%rdx;" - " mulxq 96(%1), %%r8, %%r13;" -- " xor %%rcx, %%rcx;" -+ " xor %%ecx, %%ecx;" - " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%rbx;" - " adcx %%r13, %%r9;" diff --git a/target/linux/generic/backport-5.4/080-wireguard-0064-crypto-poly1305-x86_64-Use-XORL-r32-32.patch b/target/linux/generic/backport-5.4/080-wireguard-0064-crypto-poly1305-x86_64-Use-XORL-r32-32.patch deleted file mode 100644 index 4fcaa1eb75..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0064-crypto-poly1305-x86_64-Use-XORL-r32-32.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Uros Bizjak <ubizjak@gmail.com> -Date: Thu, 27 Aug 2020 19:38:31 +0200 -Subject: [PATCH] crypto: poly1305-x86_64 - Use XORL r32,32 - -commit 7dfd1e01b3dfc13431b1b25720cf2692a7e111ef upstream. - -x86_64 zero extends 32bit operations, so for 64bit operands, -XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids -a REX prefix byte when legacy registers are used. - -Signed-off-by: Uros Bizjak <ubizjak@gmail.com> -Cc: Herbert Xu <herbert@gondor.apana.org.au> -Cc: "David S. Miller" <davem@davemloft.net> -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl -+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl -@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel); - ___ - &declare_function("poly1305_init_x86_64", 32, 3); - $code.=<<___; -- xor %rax,%rax -+ xor %eax,%eax - mov %rax,0($ctx) # initialize hash value - mov %rax,8($ctx) - mov %rax,16($ctx) -@@ -2869,7 +2869,7 @@ $code.=<<___; - .type poly1305_init_base2_44,\@function,3 - .align 32 - poly1305_init_base2_44: -- xor %rax,%rax -+ xor %eax,%eax - mov %rax,0($ctx) # initialize hash value - mov %rax,8($ctx) - mov %rax,16($ctx) -@@ -3963,7 +3963,7 @@ xor128_decrypt_n_pad: - mov \$16,$len - sub %r10,$len - xor %eax,%eax -- xor %r11,%r11 -+ xor %r11d,%r11d - .Loop_dec_byte: - mov ($inp,$otp),%r11b - mov ($otp),%al -@@ -4101,7 +4101,7 @@ avx_handler: - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi -- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER -+ xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry diff --git a/target/linux/generic/backport-5.4/080-wireguard-0065-crypto-x86-poly1305-Remove-assignments-with-no-effec.patch b/target/linux/generic/backport-5.4/080-wireguard-0065-crypto-x86-poly1305-Remove-assignments-with-no-effec.patch deleted file mode 100644 index ee64bfe1fc..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0065-crypto-x86-poly1305-Remove-assignments-with-no-effec.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Herbert Xu <herbert@gondor.apana.org.au> -Date: Thu, 24 Sep 2020 13:29:04 +1000 -Subject: [PATCH] crypto: x86/poly1305 - Remove assignments with no effect - -commit 4a0c1de64bf9d9027a6f19adfba89fc27893db23 upstream. - -This patch removes a few ineffectual assignments from the function -crypto_poly1305_setdctxkey. - -Reported-by: kernel test robot <lkp@intel.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 3 --- - 1 file changed, 3 deletions(-) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -157,9 +157,6 @@ static unsigned int crypto_poly1305_setd - dctx->s[1] = get_unaligned_le32(&inp[4]); - dctx->s[2] = get_unaligned_le32(&inp[8]); - dctx->s[3] = get_unaligned_le32(&inp[12]); -- inp += POLY1305_BLOCK_SIZE; -- len -= POLY1305_BLOCK_SIZE; -- acc += POLY1305_BLOCK_SIZE; - dctx->sset = true; - } - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0066-crypto-x86-poly1305-add-back-a-needed-assignment.patch b/target/linux/generic/backport-5.4/080-wireguard-0066-crypto-x86-poly1305-add-back-a-needed-assignment.patch deleted file mode 100644 index dce8bb912b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0066-crypto-x86-poly1305-add-back-a-needed-assignment.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Eric Biggers <ebiggers@google.com> -Date: Fri, 23 Oct 2020 15:27:48 -0700 -Subject: [PATCH] crypto: x86/poly1305 - add back a needed assignment - -commit c3a98c3ad5c0dc60a1ac66bf91147a3f39cac96b upstream. - -One of the assignments that was removed by commit 4a0c1de64bf9 ("crypto: -x86/poly1305 - Remove assignments with no effect") is actually needed, -since it affects the return value. - -This fixes the following crypto self-test failure: - - alg: shash: poly1305-simd test failed (wrong result) on test vector 2, cfg="init+update+final aligned buffer" - -Fixes: 4a0c1de64bf9 ("crypto: x86/poly1305 - Remove assignments with no effect") -Signed-off-by: Eric Biggers <ebiggers@google.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/poly1305_glue.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -157,6 +157,7 @@ static unsigned int crypto_poly1305_setd - dctx->s[1] = get_unaligned_le32(&inp[4]); - dctx->s[2] = get_unaligned_le32(&inp[8]); - dctx->s[3] = get_unaligned_le32(&inp[12]); -+ acc += POLY1305_BLOCK_SIZE; - dctx->sset = true; - } - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0067-crypto-Kconfig-CRYPTO_MANAGER_EXTRA_TESTS-requires-t.patch b/target/linux/generic/backport-5.4/080-wireguard-0067-crypto-Kconfig-CRYPTO_MANAGER_EXTRA_TESTS-requires-t.patch deleted file mode 100644 index 31c47df4b3..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0067-crypto-Kconfig-CRYPTO_MANAGER_EXTRA_TESTS-requires-t.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 2 Nov 2020 14:48:15 +0100 -Subject: [PATCH] crypto: Kconfig - CRYPTO_MANAGER_EXTRA_TESTS requires the - manager - -commit 6569e3097f1c4a490bdf2b23d326855e04942dfd upstream. - -The extra tests in the manager actually require the manager to be -selected too. Otherwise the linker gives errors like: - -ld: arch/x86/crypto/chacha_glue.o: in function `chacha_simd_stream_xor': -chacha_glue.c:(.text+0x422): undefined reference to `crypto_simd_disabled_for_test' - -Fixes: 2343d1529aff ("crypto: Kconfig - allow tests to be disabled when manager is disabled") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - crypto/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -145,7 +145,7 @@ config CRYPTO_MANAGER_DISABLE_TESTS - - config CRYPTO_MANAGER_EXTRA_TESTS - bool "Enable extra run-time crypto self tests" -- depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS -+ depends on DEBUG_KERNEL && !CRYPTO_MANAGER_DISABLE_TESTS && CRYPTO_MANAGER - help - Enable extra run-time self tests of registered crypto algorithms, - including randomized fuzz tests. diff --git a/target/linux/generic/backport-5.4/080-wireguard-0068-crypto-arm-chacha-neon-optimize-for-non-block-size-m.patch b/target/linux/generic/backport-5.4/080-wireguard-0068-crypto-arm-chacha-neon-optimize-for-non-block-size-m.patch deleted file mode 100644 index b31b8d9a0e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0068-crypto-arm-chacha-neon-optimize-for-non-block-size-m.patch +++ /dev/null @@ -1,272 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Tue, 3 Nov 2020 17:28:09 +0100 -Subject: [PATCH] crypto: arm/chacha-neon - optimize for non-block size - multiples - -commit 86cd97ec4b943af35562a74688bc4e909b32c3d1 upstream. - -The current NEON based ChaCha implementation for ARM is optimized for -multiples of 4x the ChaCha block size (64 bytes). This makes sense for -block encryption, but given that ChaCha is also often used in the -context of networking, it makes sense to consider arbitrary length -inputs as well. - -For example, WireGuard typically uses 1420 byte packets, and performing -ChaCha encryption involves 5 invocations of chacha_4block_xor_neon() -and 3 invocations of chacha_block_xor_neon(), where the last one also -involves a memcpy() using a buffer on the stack to process the final -chunk of 1420 % 64 == 12 bytes. - -Let's optimize for this case as well, by letting chacha_4block_xor_neon() -deal with any input size between 64 and 256 bytes, using NEON permutation -instructions and overlapping loads and stores. This way, the 140 byte -tail of a 1420 byte input buffer can simply be processed in one go. - -This results in the following performance improvements for 1420 byte -blocks, without significant impact on power-of-2 input sizes. (Note -that Raspberry Pi is widely used in combination with a 32-bit kernel, -even though the core is 64-bit capable) - - Cortex-A8 (BeagleBone) : 7% - Cortex-A15 (Calxeda Midway) : 21% - Cortex-A53 (Raspberry Pi 3) : 3% - Cortex-A72 (Raspberry Pi 4) : 19% - -Cc: Eric Biggers <ebiggers@google.com> -Cc: "Jason A . Donenfeld" <Jason@zx2c4.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 34 +++++------ - arch/arm/crypto/chacha-neon-core.S | 97 +++++++++++++++++++++++++++--- - 2 files changed, 107 insertions(+), 24 deletions(-) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -23,7 +23,7 @@ - asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); - asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, -- int nrounds); -+ int nrounds, unsigned int nbytes); - asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); - asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); - -@@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 - { - u8 buf[CHACHA_BLOCK_SIZE]; - -- while (bytes >= CHACHA_BLOCK_SIZE * 4) { -- chacha_4block_xor_neon(state, dst, src, nrounds); -- bytes -= CHACHA_BLOCK_SIZE * 4; -- src += CHACHA_BLOCK_SIZE * 4; -- dst += CHACHA_BLOCK_SIZE * 4; -- state[12] += 4; -- } -- while (bytes >= CHACHA_BLOCK_SIZE) { -- chacha_block_xor_neon(state, dst, src, nrounds); -- bytes -= CHACHA_BLOCK_SIZE; -- src += CHACHA_BLOCK_SIZE; -- dst += CHACHA_BLOCK_SIZE; -- state[12]++; -+ while (bytes > CHACHA_BLOCK_SIZE) { -+ unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); -+ -+ chacha_4block_xor_neon(state, dst, src, nrounds, l); -+ bytes -= l; -+ src += l; -+ dst += l; -+ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } - if (bytes) { -- memcpy(buf, src, bytes); -- chacha_block_xor_neon(state, buf, buf, nrounds); -- memcpy(dst, buf, bytes); -+ const u8 *s = src; -+ u8 *d = dst; -+ -+ if (bytes != CHACHA_BLOCK_SIZE) -+ s = d = memcpy(buf, src, bytes); -+ chacha_block_xor_neon(state, d, s, nrounds); -+ if (d != dst) -+ memcpy(dst, buf, bytes); - } - } - ---- a/arch/arm/crypto/chacha-neon-core.S -+++ b/arch/arm/crypto/chacha-neon-core.S -@@ -47,6 +47,7 @@ - */ - - #include <linux/linkage.h> -+#include <asm/cache.h> - - .text - .fpu neon -@@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon) - - .align 5 - ENTRY(chacha_4block_xor_neon) -- push {r4-r5} -+ push {r4, lr} - mov r4, sp // preserve the stack pointer - sub ip, sp, #0x20 // allocate a 32 byte buffer - bic ip, ip, #0x1f // aligned to 32 bytes -@@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon) - vld1.32 {q0-q1}, [r0] - vld1.32 {q2-q3}, [ip] - -- adr r5, .Lctrinc -+ adr lr, .Lctrinc - vdup.32 q15, d7[1] - vdup.32 q14, d7[0] -- vld1.32 {q4}, [r5, :128] -+ vld1.32 {q4}, [lr, :128] - vdup.32 q13, d6[1] - vdup.32 q12, d6[0] - vdup.32 q11, d5[1] -@@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon) - - // Re-interleave the words in the first two rows of each block (x0..7). - // Also add the counter values 0-3 to x12[0-3]. -- vld1.32 {q8}, [r5, :128] // load counter values 0-3 -+ vld1.32 {q8}, [lr, :128] // load counter values 0-3 - vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) - vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) - vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) -@@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon) - - // Re-interleave the words in the last two rows of each block (x8..15). - vld1.32 {q8-q9}, [sp, :256] -+ mov sp, r4 // restore original stack pointer -+ ldr r4, [r4, #8] // load number of bytes - vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) - vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) - vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) -@@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon) - // XOR the rest of the data with the keystream - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #96 - veor q0, q0, q8 - veor q1, q1, q12 -+ ble .Lle96 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #32 - veor q0, q0, q2 - veor q1, q1, q6 -+ ble .Lle128 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #32 - veor q0, q0, q10 - veor q1, q1, q14 -+ ble .Lle160 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #32 - veor q0, q0, q4 - veor q1, q1, q5 -+ ble .Lle192 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #32 - veor q0, q0, q9 - veor q1, q1, q13 -+ ble .Lle224 - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2]! -+ subs r4, r4, #32 - veor q0, q0, q3 - veor q1, q1, q7 -+ blt .Llt256 -+.Lout: - vst1.8 {q0-q1}, [r1]! - - vld1.8 {q0-q1}, [r2] -- mov sp, r4 // restore original stack pointer - veor q0, q0, q11 - veor q1, q1, q15 - vst1.8 {q0-q1}, [r1] - -- pop {r4-r5} -- bx lr -+ pop {r4, pc} -+ -+.Lle192: -+ vmov q4, q9 -+ vmov q5, q13 -+ -+.Lle160: -+ // nothing to do -+ -+.Lfinalblock: -+ // Process the final block if processing less than 4 full blocks. -+ // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the -+ // previous 32 byte output block that still needs to be written at -+ // [r1] in q0-q1. -+ beq .Lfullblock -+ -+.Lpartialblock: -+ adr lr, .Lpermute + 32 -+ add r2, r2, r4 -+ add lr, lr, r4 -+ add r4, r4, r1 -+ -+ vld1.8 {q2-q3}, [lr] -+ vld1.8 {q6-q7}, [r2] -+ -+ add r4, r4, #32 -+ -+ vtbl.8 d4, {q4-q5}, d4 -+ vtbl.8 d5, {q4-q5}, d5 -+ vtbl.8 d6, {q4-q5}, d6 -+ vtbl.8 d7, {q4-q5}, d7 -+ -+ veor q6, q6, q2 -+ veor q7, q7, q3 -+ -+ vst1.8 {q6-q7}, [r4] // overlapping stores -+ vst1.8 {q0-q1}, [r1] -+ pop {r4, pc} -+ -+.Lfullblock: -+ vmov q11, q4 -+ vmov q15, q5 -+ b .Lout -+.Lle96: -+ vmov q4, q2 -+ vmov q5, q6 -+ b .Lfinalblock -+.Lle128: -+ vmov q4, q10 -+ vmov q5, q14 -+ b .Lfinalblock -+.Lle224: -+ vmov q4, q3 -+ vmov q5, q7 -+ b .Lfinalblock -+.Llt256: -+ vmov q4, q11 -+ vmov q5, q15 -+ b .Lpartialblock - ENDPROC(chacha_4block_xor_neon) -+ -+ .align L1_CACHE_SHIFT -+.Lpermute: -+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 -+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f -+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 -+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f diff --git a/target/linux/generic/backport-5.4/080-wireguard-0069-crypto-arm64-chacha-simplify-tail-block-handling.patch b/target/linux/generic/backport-5.4/080-wireguard-0069-crypto-arm64-chacha-simplify-tail-block-handling.patch deleted file mode 100644 index 42e9048b99..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0069-crypto-arm64-chacha-simplify-tail-block-handling.patch +++ /dev/null @@ -1,324 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 6 Nov 2020 17:39:38 +0100 -Subject: [PATCH] crypto: arm64/chacha - simplify tail block handling - -commit c4fc6328d6c67690a7e6e03f43a5a976a13120ef upstream. - -Based on lessons learnt from optimizing the 32-bit version of this driver, -we can simplify the arm64 version considerably, by reordering the final -two stores when the last block is not a multiple of 64 bytes. This removes -the need to use permutation instructions to calculate the elements that are -clobbered by the final overlapping store, given that the store of the -penultimate block now follows it, and that one carries the correct values -for those elements already. - -While at it, simplify the overlapping loads as well, by calculating the -address of the final overlapping load upfront, and switching to this -address for every load that would otherwise extend past the end of the -source buffer. - -There is no impact on performance, but the resulting code is substantially -smaller and easier to follow. - -Cc: Eric Biggers <ebiggers@google.com> -Cc: "Jason A . Donenfeld" <Jason@zx2c4.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm64/crypto/chacha-neon-core.S | 193 ++++++++++----------------- - 1 file changed, 69 insertions(+), 124 deletions(-) - ---- a/arch/arm64/crypto/chacha-neon-core.S -+++ b/arch/arm64/crypto/chacha-neon-core.S -@@ -195,7 +195,6 @@ ENTRY(chacha_4block_xor_neon) - adr_l x10, .Lpermute - and x5, x4, #63 - add x10, x10, x5 -- add x11, x10, #64 - - // - // This function encrypts four consecutive ChaCha blocks by loading -@@ -645,11 +644,11 @@ CPU_BE( rev a15, a15 ) - zip2 v31.4s, v14.4s, v15.4s - eor a15, a15, w9 - -- mov x3, #64 -+ add x3, x2, x4 -+ sub x3, x3, #128 // start of last block -+ - subs x5, x4, #128 -- add x6, x5, x2 -- csel x3, x3, xzr, ge -- csel x2, x2, x6, ge -+ csel x2, x2, x3, ge - - // interleave 64-bit words in state n, n+2 - zip1 v0.2d, v16.2d, v18.2d -@@ -658,13 +657,10 @@ CPU_BE( rev a15, a15 ) - zip1 v8.2d, v17.2d, v19.2d - zip2 v12.2d, v17.2d, v19.2d - stp a2, a3, [x1, #-56] -- ld1 {v16.16b-v19.16b}, [x2], x3 - - subs x6, x4, #192 -- ccmp x3, xzr, #4, lt -- add x7, x6, x2 -- csel x3, x3, xzr, eq -- csel x2, x2, x7, eq -+ ld1 {v16.16b-v19.16b}, [x2], #64 -+ csel x2, x2, x3, ge - - zip1 v1.2d, v20.2d, v22.2d - zip2 v5.2d, v20.2d, v22.2d -@@ -672,13 +668,10 @@ CPU_BE( rev a15, a15 ) - zip1 v9.2d, v21.2d, v23.2d - zip2 v13.2d, v21.2d, v23.2d - stp a6, a7, [x1, #-40] -- ld1 {v20.16b-v23.16b}, [x2], x3 - - subs x7, x4, #256 -- ccmp x3, xzr, #4, lt -- add x8, x7, x2 -- csel x3, x3, xzr, eq -- csel x2, x2, x8, eq -+ ld1 {v20.16b-v23.16b}, [x2], #64 -+ csel x2, x2, x3, ge - - zip1 v2.2d, v24.2d, v26.2d - zip2 v6.2d, v24.2d, v26.2d -@@ -686,12 +679,10 @@ CPU_BE( rev a15, a15 ) - zip1 v10.2d, v25.2d, v27.2d - zip2 v14.2d, v25.2d, v27.2d - stp a10, a11, [x1, #-24] -- ld1 {v24.16b-v27.16b}, [x2], x3 - - subs x8, x4, #320 -- ccmp x3, xzr, #4, lt -- add x9, x8, x2 -- csel x2, x2, x9, eq -+ ld1 {v24.16b-v27.16b}, [x2], #64 -+ csel x2, x2, x3, ge - - zip1 v3.2d, v28.2d, v30.2d - zip2 v7.2d, v28.2d, v30.2d -@@ -699,151 +690,105 @@ CPU_BE( rev a15, a15 ) - zip1 v11.2d, v29.2d, v31.2d - zip2 v15.2d, v29.2d, v31.2d - stp a14, a15, [x1, #-8] -+ -+ tbnz x5, #63, .Lt128 - ld1 {v28.16b-v31.16b}, [x2] - - // xor with corresponding input, write to output -- tbnz x5, #63, 0f - eor v16.16b, v16.16b, v0.16b - eor v17.16b, v17.16b, v1.16b - eor v18.16b, v18.16b, v2.16b - eor v19.16b, v19.16b, v3.16b -- st1 {v16.16b-v19.16b}, [x1], #64 -- cbz x5, .Lout - -- tbnz x6, #63, 1f -+ tbnz x6, #63, .Lt192 -+ - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b -- st1 {v20.16b-v23.16b}, [x1], #64 -- cbz x6, .Lout - -- tbnz x7, #63, 2f -+ st1 {v16.16b-v19.16b}, [x1], #64 -+ tbnz x7, #63, .Lt256 -+ - eor v24.16b, v24.16b, v8.16b - eor v25.16b, v25.16b, v9.16b - eor v26.16b, v26.16b, v10.16b - eor v27.16b, v27.16b, v11.16b -- st1 {v24.16b-v27.16b}, [x1], #64 -- cbz x7, .Lout - -- tbnz x8, #63, 3f -+ st1 {v20.16b-v23.16b}, [x1], #64 -+ tbnz x8, #63, .Lt320 -+ - eor v28.16b, v28.16b, v12.16b - eor v29.16b, v29.16b, v13.16b - eor v30.16b, v30.16b, v14.16b - eor v31.16b, v31.16b, v15.16b -+ -+ st1 {v24.16b-v27.16b}, [x1], #64 - st1 {v28.16b-v31.16b}, [x1] - - .Lout: frame_pop - ret - -- // fewer than 128 bytes of in/output --0: ld1 {v8.16b}, [x10] -- ld1 {v9.16b}, [x11] -- movi v10.16b, #16 -- sub x2, x1, #64 -- add x1, x1, x5 -- ld1 {v16.16b-v19.16b}, [x2] -- tbl v4.16b, {v0.16b-v3.16b}, v8.16b -- tbx v20.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v5.16b, {v0.16b-v3.16b}, v8.16b -- tbx v21.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v6.16b, {v0.16b-v3.16b}, v8.16b -- tbx v22.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v7.16b, {v0.16b-v3.16b}, v8.16b -- tbx v23.16b, {v16.16b-v19.16b}, v9.16b -- -- eor v20.16b, v20.16b, v4.16b -- eor v21.16b, v21.16b, v5.16b -- eor v22.16b, v22.16b, v6.16b -- eor v23.16b, v23.16b, v7.16b -- st1 {v20.16b-v23.16b}, [x1] -- b .Lout -- - // fewer than 192 bytes of in/output --1: ld1 {v8.16b}, [x10] -- ld1 {v9.16b}, [x11] -- movi v10.16b, #16 -- add x1, x1, x6 -- tbl v0.16b, {v4.16b-v7.16b}, v8.16b -- tbx v20.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v1.16b, {v4.16b-v7.16b}, v8.16b -- tbx v21.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v2.16b, {v4.16b-v7.16b}, v8.16b -- tbx v22.16b, {v16.16b-v19.16b}, v9.16b -- add v8.16b, v8.16b, v10.16b -- add v9.16b, v9.16b, v10.16b -- tbl v3.16b, {v4.16b-v7.16b}, v8.16b -- tbx v23.16b, {v16.16b-v19.16b}, v9.16b -- -- eor v20.16b, v20.16b, v0.16b -- eor v21.16b, v21.16b, v1.16b -- eor v22.16b, v22.16b, v2.16b -- eor v23.16b, v23.16b, v3.16b -- st1 {v20.16b-v23.16b}, [x1] -+.Lt192: cbz x5, 1f // exactly 128 bytes? -+ ld1 {v28.16b-v31.16b}, [x10] -+ add x5, x5, x1 -+ tbl v28.16b, {v4.16b-v7.16b}, v28.16b -+ tbl v29.16b, {v4.16b-v7.16b}, v29.16b -+ tbl v30.16b, {v4.16b-v7.16b}, v30.16b -+ tbl v31.16b, {v4.16b-v7.16b}, v31.16b -+ -+0: eor v20.16b, v20.16b, v28.16b -+ eor v21.16b, v21.16b, v29.16b -+ eor v22.16b, v22.16b, v30.16b -+ eor v23.16b, v23.16b, v31.16b -+ st1 {v20.16b-v23.16b}, [x5] // overlapping stores -+1: st1 {v16.16b-v19.16b}, [x1] - b .Lout - -+ // fewer than 128 bytes of in/output -+.Lt128: ld1 {v28.16b-v31.16b}, [x10] -+ add x5, x5, x1 -+ sub x1, x1, #64 -+ tbl v28.16b, {v0.16b-v3.16b}, v28.16b -+ tbl v29.16b, {v0.16b-v3.16b}, v29.16b -+ tbl v30.16b, {v0.16b-v3.16b}, v30.16b -+ tbl v31.16b, {v0.16b-v3.16b}, v31.16b -+ ld1 {v16.16b-v19.16b}, [x1] // reload first output block -+ b 0b -+ - // fewer than 256 bytes of in/output --2: ld1 {v4.16b}, [x10] -- ld1 {v5.16b}, [x11] -- movi v6.16b, #16 -- add x1, x1, x7 -+.Lt256: cbz x6, 2f // exactly 192 bytes? -+ ld1 {v4.16b-v7.16b}, [x10] -+ add x6, x6, x1 - tbl v0.16b, {v8.16b-v11.16b}, v4.16b -- tbx v24.16b, {v20.16b-v23.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v1.16b, {v8.16b-v11.16b}, v4.16b -- tbx v25.16b, {v20.16b-v23.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v2.16b, {v8.16b-v11.16b}, v4.16b -- tbx v26.16b, {v20.16b-v23.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v3.16b, {v8.16b-v11.16b}, v4.16b -- tbx v27.16b, {v20.16b-v23.16b}, v5.16b -- -- eor v24.16b, v24.16b, v0.16b -- eor v25.16b, v25.16b, v1.16b -- eor v26.16b, v26.16b, v2.16b -- eor v27.16b, v27.16b, v3.16b -- st1 {v24.16b-v27.16b}, [x1] -+ tbl v1.16b, {v8.16b-v11.16b}, v5.16b -+ tbl v2.16b, {v8.16b-v11.16b}, v6.16b -+ tbl v3.16b, {v8.16b-v11.16b}, v7.16b -+ -+ eor v28.16b, v28.16b, v0.16b -+ eor v29.16b, v29.16b, v1.16b -+ eor v30.16b, v30.16b, v2.16b -+ eor v31.16b, v31.16b, v3.16b -+ st1 {v28.16b-v31.16b}, [x6] // overlapping stores -+2: st1 {v20.16b-v23.16b}, [x1] - b .Lout - - // fewer than 320 bytes of in/output --3: ld1 {v4.16b}, [x10] -- ld1 {v5.16b}, [x11] -- movi v6.16b, #16 -- add x1, x1, x8 -+.Lt320: cbz x7, 3f // exactly 256 bytes? -+ ld1 {v4.16b-v7.16b}, [x10] -+ add x7, x7, x1 - tbl v0.16b, {v12.16b-v15.16b}, v4.16b -- tbx v28.16b, {v24.16b-v27.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v1.16b, {v12.16b-v15.16b}, v4.16b -- tbx v29.16b, {v24.16b-v27.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v2.16b, {v12.16b-v15.16b}, v4.16b -- tbx v30.16b, {v24.16b-v27.16b}, v5.16b -- add v4.16b, v4.16b, v6.16b -- add v5.16b, v5.16b, v6.16b -- tbl v3.16b, {v12.16b-v15.16b}, v4.16b -- tbx v31.16b, {v24.16b-v27.16b}, v5.16b -+ tbl v1.16b, {v12.16b-v15.16b}, v5.16b -+ tbl v2.16b, {v12.16b-v15.16b}, v6.16b -+ tbl v3.16b, {v12.16b-v15.16b}, v7.16b - - eor v28.16b, v28.16b, v0.16b - eor v29.16b, v29.16b, v1.16b - eor v30.16b, v30.16b, v2.16b - eor v31.16b, v31.16b, v3.16b -- st1 {v28.16b-v31.16b}, [x1] -+ st1 {v28.16b-v31.16b}, [x7] // overlapping stores -+3: st1 {v24.16b-v27.16b}, [x1] - b .Lout - ENDPROC(chacha_4block_xor_neon) - -@@ -851,7 +796,7 @@ ENDPROC(chacha_4block_xor_neon) - .align L1_CACHE_SHIFT - .Lpermute: - .set .Li, 0 -- .rept 192 -+ .rept 128 - .byte (.Li - 64) - .set .Li, .Li + 1 - .endr diff --git a/target/linux/generic/backport-5.4/080-wireguard-0070-crypto-lib-chacha20poly1305-define-empty-module-exit.patch b/target/linux/generic/backport-5.4/080-wireguard-0070-crypto-lib-chacha20poly1305-define-empty-module-exit.patch deleted file mode 100644 index 084ae74bfd..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0070-crypto-lib-chacha20poly1305-define-empty-module-exit.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 15 Jan 2021 20:30:12 +0100 -Subject: [PATCH] crypto: lib/chacha20poly1305 - define empty module exit - function - -commit ac88c322d0f2917d41d13553c69e9d7f043c8b6f upstream. - -With no mod_exit function, users are unable to unload the module after -use. I'm not aware of any reason why module unloading should be -prohibited for this one, so this commit simply adds an empty exit -function. - -Reported-and-tested-by: John Donnelly <john.p.donnelly@oracle.com> -Acked-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - lib/crypto/chacha20poly1305.c | 5 +++++ - 1 file changed, 5 insertions(+) - ---- a/lib/crypto/chacha20poly1305.c -+++ b/lib/crypto/chacha20poly1305.c -@@ -364,7 +364,12 @@ static int __init mod_init(void) - return 0; - } - -+static void __exit mod_exit(void) -+{ -+} -+ - module_init(mod_init); -+module_exit(mod_exit); - MODULE_LICENSE("GPL v2"); - MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction"); - MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0071-crypto-arm-chacha-neon-add-missing-counter-increment.patch b/target/linux/generic/backport-5.4/080-wireguard-0071-crypto-arm-chacha-neon-add-missing-counter-increment.patch deleted file mode 100644 index ea3cc802a9..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0071-crypto-arm-chacha-neon-add-missing-counter-increment.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Sun, 13 Dec 2020 15:39:29 +0100 -Subject: [PATCH] crypto: arm/chacha-neon - add missing counter increment - -commit fd16931a2f518a32753920ff20895e5cf04c8ff1 upstream. - -Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block -size multiples") refactored the chacha block handling in the glue code in -a way that may result in the counter increment to be omitted when calling -chacha_block_xor_neon() to process a full block. This violates the skcipher -API, which requires that the output IV is suitable for handling more input -as long as the preceding input has been presented in round multiples of the -block size. Also, the same code is exposed via the chacha library interface -whose callers may actually rely on this increment to occur even for final -blocks that are smaller than the chacha block size. - -So increment the counter after calling chacha_block_xor_neon(). - -Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples") -Reported-by: Eric Biggers <ebiggers@kernel.org> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8 - chacha_block_xor_neon(state, d, s, nrounds); - if (d != dst) - memcpy(dst, buf, bytes); -+ state[12]++; - } - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0072-net-WireGuard-secure-network-tunnel.patch b/target/linux/generic/backport-5.4/080-wireguard-0072-net-WireGuard-secure-network-tunnel.patch deleted file mode 100644 index a29da1e7b2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0072-net-WireGuard-secure-network-tunnel.patch +++ /dev/null @@ -1,8071 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 9 Dec 2019 00:27:34 +0100 -Subject: [PATCH] net: WireGuard secure network tunnel - -commit e7096c131e5161fa3b8e52a650d7719d2857adfd upstream. - -WireGuard is a layer 3 secure networking tunnel made specifically for -the kernel, that aims to be much simpler and easier to audit than IPsec. -Extensive documentation and description of the protocol and -considerations, along with formal proofs of the cryptography, are -available at: - - * https://www.wireguard.com/ - * https://www.wireguard.com/papers/wireguard.pdf - -This commit implements WireGuard as a simple network device driver, -accessible in the usual RTNL way used by virtual network drivers. It -makes use of the udp_tunnel APIs, GRO, GSO, NAPI, and the usual set of -networking subsystem APIs. It has a somewhat novel multicore queueing -system designed for maximum throughput and minimal latency of encryption -operations, but it is implemented modestly using workqueues and NAPI. -Configuration is done via generic Netlink, and following a review from -the Netlink maintainer a year ago, several high profile userspace tools -have already implemented the API. - -This commit also comes with several different tests, both in-kernel -tests and out-of-kernel tests based on network namespaces, taking profit -of the fact that sockets used by WireGuard intentionally stay in the -namespace the WireGuard interface was originally created, exactly like -the semantics of userspace tun devices. See wireguard.com/netns/ for -pictures and examples. - -The source code is fairly short, but rather than combining everything -into a single file, WireGuard is developed as cleanly separable files, -making auditing and comprehension easier. Things are laid out as -follows: - - * noise.[ch], cookie.[ch], messages.h: These implement the bulk of the - cryptographic aspects of the protocol, and are mostly data-only in - nature, taking in buffers of bytes and spitting out buffers of - bytes. They also handle reference counting for their various shared - pieces of data, like keys and key lists. - - * ratelimiter.[ch]: Used as an integral part of cookie.[ch] for - ratelimiting certain types of cryptographic operations in accordance - with particular WireGuard semantics. - - * allowedips.[ch], peerlookup.[ch]: The main lookup structures of - WireGuard, the former being trie-like with particular semantics, an - integral part of the design of the protocol, and the latter just - being nice helper functions around the various hashtables we use. - - * device.[ch]: Implementation of functions for the netdevice and for - rtnl, responsible for maintaining the life of a given interface and - wiring it up to the rest of WireGuard. - - * peer.[ch]: Each interface has a list of peers, with helper functions - available here for creation, destruction, and reference counting. - - * socket.[ch]: Implementation of functions related to udp_socket and - the general set of kernel socket APIs, for sending and receiving - ciphertext UDP packets, and taking care of WireGuard-specific sticky - socket routing semantics for the automatic roaming. - - * netlink.[ch]: Userspace API entry point for configuring WireGuard - peers and devices. The API has been implemented by several userspace - tools and network management utility, and the WireGuard project - distributes the basic wg(8) tool. - - * queueing.[ch]: Shared function on the rx and tx path for handling - the various queues used in the multicore algorithms. - - * send.c: Handles encrypting outgoing packets in parallel on - multiple cores, before sending them in order on a single core, via - workqueues and ring buffers. Also handles sending handshake and cookie - messages as part of the protocol, in parallel. - - * receive.c: Handles decrypting incoming packets in parallel on - multiple cores, before passing them off in order to be ingested via - the rest of the networking subsystem with GRO via the typical NAPI - poll function. Also handles receiving handshake and cookie messages - as part of the protocol, in parallel. - - * timers.[ch]: Uses the timer wheel to implement protocol particular - event timeouts, and gives a set of very simple event-driven entry - point functions for callers. - - * main.c, version.h: Initialization and deinitialization of the module. - - * selftest/*.h: Runtime unit tests for some of the most security - sensitive functions. - - * tools/testing/selftests/wireguard/netns.sh: Aforementioned testing - script using network namespaces. - -This commit aims to be as self-contained as possible, implementing -WireGuard as a standalone module not needing much special handling or -coordination from the network subsystem. I expect for future -optimizations to the network stack to positively improve WireGuard, and -vice-versa, but for the time being, this exists as intentionally -standalone. - -We introduce a menu option for CONFIG_WIREGUARD, as well as providing a -verbose debug log and self-tests via CONFIG_WIREGUARD_DEBUG. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Cc: David Miller <davem@davemloft.net> -Cc: Greg KH <gregkh@linuxfoundation.org> -Cc: Linus Torvalds <torvalds@linux-foundation.org> -Cc: Herbert Xu <herbert@gondor.apana.org.au> -Cc: linux-crypto@vger.kernel.org -Cc: linux-kernel@vger.kernel.org -Cc: netdev@vger.kernel.org -Signed-off-by: David S. Miller <davem@davemloft.net> -[Jason: ported to 5.4 by doing the following: - - wg_get_device_start uses genl_family_attrbuf - - trival skb_redirect_reset change from 2c64605b590e is folded in - - skb_list_walk_safe was already backported prior] -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - MAINTAINERS | 8 + - drivers/net/Kconfig | 41 + - drivers/net/Makefile | 1 + - drivers/net/wireguard/Makefile | 18 + - drivers/net/wireguard/allowedips.c | 381 +++++++++ - drivers/net/wireguard/allowedips.h | 59 ++ - drivers/net/wireguard/cookie.c | 236 ++++++ - drivers/net/wireguard/cookie.h | 59 ++ - drivers/net/wireguard/device.c | 458 ++++++++++ - drivers/net/wireguard/device.h | 65 ++ - drivers/net/wireguard/main.c | 64 ++ - drivers/net/wireguard/messages.h | 128 +++ - drivers/net/wireguard/netlink.c | 648 +++++++++++++++ - drivers/net/wireguard/netlink.h | 12 + - drivers/net/wireguard/noise.c | 828 +++++++++++++++++++ - drivers/net/wireguard/noise.h | 137 +++ - drivers/net/wireguard/peer.c | 240 ++++++ - drivers/net/wireguard/peer.h | 83 ++ - drivers/net/wireguard/peerlookup.c | 221 +++++ - drivers/net/wireguard/peerlookup.h | 64 ++ - drivers/net/wireguard/queueing.c | 53 ++ - drivers/net/wireguard/queueing.h | 197 +++++ - drivers/net/wireguard/ratelimiter.c | 223 +++++ - drivers/net/wireguard/ratelimiter.h | 19 + - drivers/net/wireguard/receive.c | 595 +++++++++++++ - drivers/net/wireguard/selftest/allowedips.c | 683 +++++++++++++++ - drivers/net/wireguard/selftest/counter.c | 104 +++ - drivers/net/wireguard/selftest/ratelimiter.c | 226 +++++ - drivers/net/wireguard/send.c | 413 +++++++++ - drivers/net/wireguard/socket.c | 437 ++++++++++ - drivers/net/wireguard/socket.h | 44 + - drivers/net/wireguard/timers.c | 243 ++++++ - drivers/net/wireguard/timers.h | 31 + - drivers/net/wireguard/version.h | 1 + - include/uapi/linux/wireguard.h | 196 +++++ - tools/testing/selftests/wireguard/netns.sh | 537 ++++++++++++ - 36 files changed, 7753 insertions(+) - create mode 100644 drivers/net/wireguard/Makefile - create mode 100644 drivers/net/wireguard/allowedips.c - create mode 100644 drivers/net/wireguard/allowedips.h - create mode 100644 drivers/net/wireguard/cookie.c - create mode 100644 drivers/net/wireguard/cookie.h - create mode 100644 drivers/net/wireguard/device.c - create mode 100644 drivers/net/wireguard/device.h - create mode 100644 drivers/net/wireguard/main.c - create mode 100644 drivers/net/wireguard/messages.h - create mode 100644 drivers/net/wireguard/netlink.c - create mode 100644 drivers/net/wireguard/netlink.h - create mode 100644 drivers/net/wireguard/noise.c - create mode 100644 drivers/net/wireguard/noise.h - create mode 100644 drivers/net/wireguard/peer.c - create mode 100644 drivers/net/wireguard/peer.h - create mode 100644 drivers/net/wireguard/peerlookup.c - create mode 100644 drivers/net/wireguard/peerlookup.h - create mode 100644 drivers/net/wireguard/queueing.c - create mode 100644 drivers/net/wireguard/queueing.h - create mode 100644 drivers/net/wireguard/ratelimiter.c - create mode 100644 drivers/net/wireguard/ratelimiter.h - create mode 100644 drivers/net/wireguard/receive.c - create mode 100644 drivers/net/wireguard/selftest/allowedips.c - create mode 100644 drivers/net/wireguard/selftest/counter.c - create mode 100644 drivers/net/wireguard/selftest/ratelimiter.c - create mode 100644 drivers/net/wireguard/send.c - create mode 100644 drivers/net/wireguard/socket.c - create mode 100644 drivers/net/wireguard/socket.h - create mode 100644 drivers/net/wireguard/timers.c - create mode 100644 drivers/net/wireguard/timers.h - create mode 100644 drivers/net/wireguard/version.h - create mode 100644 include/uapi/linux/wireguard.h - create mode 100755 tools/testing/selftests/wireguard/netns.sh - ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -17585,6 +17585,14 @@ L: linux-gpio@vger.kernel.org - S: Maintained - F: drivers/gpio/gpio-ws16c48.c - -+WIREGUARD SECURE NETWORK TUNNEL -+M: Jason A. Donenfeld <Jason@zx2c4.com> -+S: Maintained -+F: drivers/net/wireguard/ -+F: tools/testing/selftests/wireguard/ -+L: wireguard@lists.zx2c4.com -+L: netdev@vger.kernel.org -+ - WISTRON LAPTOP BUTTON DRIVER - M: Miloslav Trmac <mitr@volny.cz> - S: Maintained ---- a/drivers/net/Kconfig -+++ b/drivers/net/Kconfig -@@ -71,6 +71,47 @@ config DUMMY - To compile this driver as a module, choose M here: the module - will be called dummy. - -+config WIREGUARD -+ tristate "WireGuard secure network tunnel" -+ depends on NET && INET -+ depends on IPV6 || !IPV6 -+ select NET_UDP_TUNNEL -+ select DST_CACHE -+ select CRYPTO -+ select CRYPTO_LIB_CURVE25519 -+ select CRYPTO_LIB_CHACHA20POLY1305 -+ select CRYPTO_LIB_BLAKE2S -+ select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT -+ select CRYPTO_POLY1305_X86_64 if X86 && 64BIT -+ select CRYPTO_BLAKE2S_X86 if X86 && 64BIT -+ select CRYPTO_CURVE25519_X86 if X86 && 64BIT -+ select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON -+ select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON -+ select CRYPTO_POLY1305_ARM if ARM -+ select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON -+ select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 -+ select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) -+ help -+ WireGuard is a secure, fast, and easy to use replacement for IPSec -+ that uses modern cryptography and clever networking tricks. It's -+ designed to be fairly general purpose and abstract enough to fit most -+ use cases, while at the same time remaining extremely simple to -+ configure. See www.wireguard.com for more info. -+ -+ It's safe to say Y or M here, as the driver is very lightweight and -+ is only in use when an administrator chooses to add an interface. -+ -+config WIREGUARD_DEBUG -+ bool "Debugging checks and verbose messages" -+ depends on WIREGUARD -+ help -+ This will write log messages for handshake and other events -+ that occur for a WireGuard interface. It will also perform some -+ extra validation checks and unit tests at various points. This is -+ only useful for debugging. -+ -+ Say N here unless you know what you're doing. -+ - config EQUALIZER - tristate "EQL (serial line load balancing) support" - ---help--- ---- a/drivers/net/Makefile -+++ b/drivers/net/Makefile -@@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/ - obj-$(CONFIG_IPVLAN) += ipvlan/ - obj-$(CONFIG_IPVTAP) += ipvlan/ - obj-$(CONFIG_DUMMY) += dummy.o -+obj-$(CONFIG_WIREGUARD) += wireguard/ - obj-$(CONFIG_EQUALIZER) += eql.o - obj-$(CONFIG_IFB) += ifb.o - obj-$(CONFIG_MACSEC) += macsec.o ---- /dev/null -+++ b/drivers/net/wireguard/Makefile -@@ -0,0 +1,18 @@ -+ccflags-y := -O3 -+ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' -+ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -+wireguard-y := main.o -+wireguard-y += noise.o -+wireguard-y += device.o -+wireguard-y += peer.o -+wireguard-y += timers.o -+wireguard-y += queueing.o -+wireguard-y += send.o -+wireguard-y += receive.o -+wireguard-y += socket.o -+wireguard-y += peerlookup.o -+wireguard-y += allowedips.o -+wireguard-y += ratelimiter.o -+wireguard-y += cookie.o -+wireguard-y += netlink.o -+obj-$(CONFIG_WIREGUARD) := wireguard.o ---- /dev/null -+++ b/drivers/net/wireguard/allowedips.c -@@ -0,0 +1,381 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "allowedips.h" -+#include "peer.h" -+ -+static void swap_endian(u8 *dst, const u8 *src, u8 bits) -+{ -+ if (bits == 32) { -+ *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); -+ } else if (bits == 128) { -+ ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); -+ ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); -+ } -+} -+ -+static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, -+ u8 cidr, u8 bits) -+{ -+ node->cidr = cidr; -+ node->bit_at_a = cidr / 8U; -+#ifdef __LITTLE_ENDIAN -+ node->bit_at_a ^= (bits / 8U - 1U) % 8U; -+#endif -+ node->bit_at_b = 7U - (cidr % 8U); -+ node->bitlen = bits; -+ memcpy(node->bits, src, bits / 8U); -+} -+#define CHOOSE_NODE(parent, key) \ -+ parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] -+ -+static void node_free_rcu(struct rcu_head *rcu) -+{ -+ kfree(container_of(rcu, struct allowedips_node, rcu)); -+} -+ -+static void push_rcu(struct allowedips_node **stack, -+ struct allowedips_node __rcu *p, unsigned int *len) -+{ -+ if (rcu_access_pointer(p)) { -+ WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); -+ stack[(*len)++] = rcu_dereference_raw(p); -+ } -+} -+ -+static void root_free_rcu(struct rcu_head *rcu) -+{ -+ struct allowedips_node *node, *stack[128] = { -+ container_of(rcu, struct allowedips_node, rcu) }; -+ unsigned int len = 1; -+ -+ while (len > 0 && (node = stack[--len])) { -+ push_rcu(stack, node->bit[0], &len); -+ push_rcu(stack, node->bit[1], &len); -+ kfree(node); -+ } -+} -+ -+static void root_remove_peer_lists(struct allowedips_node *root) -+{ -+ struct allowedips_node *node, *stack[128] = { root }; -+ unsigned int len = 1; -+ -+ while (len > 0 && (node = stack[--len])) { -+ push_rcu(stack, node->bit[0], &len); -+ push_rcu(stack, node->bit[1], &len); -+ if (rcu_access_pointer(node->peer)) -+ list_del(&node->peer_list); -+ } -+} -+ -+static void walk_remove_by_peer(struct allowedips_node __rcu **top, -+ struct wg_peer *peer, struct mutex *lock) -+{ -+#define REF(p) rcu_access_pointer(p) -+#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) -+#define PUSH(p) ({ \ -+ WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ -+ stack[len++] = p; \ -+ }) -+ -+ struct allowedips_node __rcu **stack[128], **nptr; -+ struct allowedips_node *node, *prev; -+ unsigned int len; -+ -+ if (unlikely(!peer || !REF(*top))) -+ return; -+ -+ for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { -+ nptr = stack[len - 1]; -+ node = DEREF(nptr); -+ if (!node) { -+ --len; -+ continue; -+ } -+ if (!prev || REF(prev->bit[0]) == node || -+ REF(prev->bit[1]) == node) { -+ if (REF(node->bit[0])) -+ PUSH(&node->bit[0]); -+ else if (REF(node->bit[1])) -+ PUSH(&node->bit[1]); -+ } else if (REF(node->bit[0]) == prev) { -+ if (REF(node->bit[1])) -+ PUSH(&node->bit[1]); -+ } else { -+ if (rcu_dereference_protected(node->peer, -+ lockdep_is_held(lock)) == peer) { -+ RCU_INIT_POINTER(node->peer, NULL); -+ list_del_init(&node->peer_list); -+ if (!node->bit[0] || !node->bit[1]) { -+ rcu_assign_pointer(*nptr, DEREF( -+ &node->bit[!REF(node->bit[0])])); -+ call_rcu(&node->rcu, node_free_rcu); -+ node = DEREF(nptr); -+ } -+ } -+ --len; -+ } -+ } -+ -+#undef REF -+#undef DEREF -+#undef PUSH -+} -+ -+static unsigned int fls128(u64 a, u64 b) -+{ -+ return a ? fls64(a) + 64U : fls64(b); -+} -+ -+static u8 common_bits(const struct allowedips_node *node, const u8 *key, -+ u8 bits) -+{ -+ if (bits == 32) -+ return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); -+ else if (bits == 128) -+ return 128U - fls128( -+ *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], -+ *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); -+ return 0; -+} -+ -+static bool prefix_matches(const struct allowedips_node *node, const u8 *key, -+ u8 bits) -+{ -+ /* This could be much faster if it actually just compared the common -+ * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and -+ * the rest, but it turns out that common_bits is already super fast on -+ * modern processors, even taking into account the unfortunate bswap. -+ * So, we just inline it like this instead. -+ */ -+ return common_bits(node, key, bits) >= node->cidr; -+} -+ -+static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, -+ const u8 *key) -+{ -+ struct allowedips_node *node = trie, *found = NULL; -+ -+ while (node && prefix_matches(node, key, bits)) { -+ if (rcu_access_pointer(node->peer)) -+ found = node; -+ if (node->cidr == bits) -+ break; -+ node = rcu_dereference_bh(CHOOSE_NODE(node, key)); -+ } -+ return found; -+} -+ -+/* Returns a strong reference to a peer */ -+static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, -+ const void *be_ip) -+{ -+ /* Aligned so it can be passed to fls/fls64 */ -+ u8 ip[16] __aligned(__alignof(u64)); -+ struct allowedips_node *node; -+ struct wg_peer *peer = NULL; -+ -+ swap_endian(ip, be_ip, bits); -+ -+ rcu_read_lock_bh(); -+retry: -+ node = find_node(rcu_dereference_bh(root), bits, ip); -+ if (node) { -+ peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); -+ if (!peer) -+ goto retry; -+ } -+ rcu_read_unlock_bh(); -+ return peer; -+} -+ -+static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, -+ u8 cidr, u8 bits, struct allowedips_node **rnode, -+ struct mutex *lock) -+{ -+ struct allowedips_node *node = rcu_dereference_protected(trie, -+ lockdep_is_held(lock)); -+ struct allowedips_node *parent = NULL; -+ bool exact = false; -+ -+ while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { -+ parent = node; -+ if (parent->cidr == cidr) { -+ exact = true; -+ break; -+ } -+ node = rcu_dereference_protected(CHOOSE_NODE(parent, key), -+ lockdep_is_held(lock)); -+ } -+ *rnode = parent; -+ return exact; -+} -+ -+static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, -+ u8 cidr, struct wg_peer *peer, struct mutex *lock) -+{ -+ struct allowedips_node *node, *parent, *down, *newnode; -+ -+ if (unlikely(cidr > bits || !peer)) -+ return -EINVAL; -+ -+ if (!rcu_access_pointer(*trie)) { -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (unlikely(!node)) -+ return -ENOMEM; -+ RCU_INIT_POINTER(node->peer, peer); -+ list_add_tail(&node->peer_list, &peer->allowedips_list); -+ copy_and_assign_cidr(node, key, cidr, bits); -+ rcu_assign_pointer(*trie, node); -+ return 0; -+ } -+ if (node_placement(*trie, key, cidr, bits, &node, lock)) { -+ rcu_assign_pointer(node->peer, peer); -+ list_move_tail(&node->peer_list, &peer->allowedips_list); -+ return 0; -+ } -+ -+ newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); -+ if (unlikely(!newnode)) -+ return -ENOMEM; -+ RCU_INIT_POINTER(newnode->peer, peer); -+ list_add_tail(&newnode->peer_list, &peer->allowedips_list); -+ copy_and_assign_cidr(newnode, key, cidr, bits); -+ -+ if (!node) { -+ down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); -+ } else { -+ down = rcu_dereference_protected(CHOOSE_NODE(node, key), -+ lockdep_is_held(lock)); -+ if (!down) { -+ rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); -+ return 0; -+ } -+ } -+ cidr = min(cidr, common_bits(down, key, bits)); -+ parent = node; -+ -+ if (newnode->cidr == cidr) { -+ rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); -+ if (!parent) -+ rcu_assign_pointer(*trie, newnode); -+ else -+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), -+ newnode); -+ } else { -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (unlikely(!node)) { -+ kfree(newnode); -+ return -ENOMEM; -+ } -+ INIT_LIST_HEAD(&node->peer_list); -+ copy_and_assign_cidr(node, newnode->bits, cidr, bits); -+ -+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); -+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); -+ if (!parent) -+ rcu_assign_pointer(*trie, node); -+ else -+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), -+ node); -+ } -+ return 0; -+} -+ -+void wg_allowedips_init(struct allowedips *table) -+{ -+ table->root4 = table->root6 = NULL; -+ table->seq = 1; -+} -+ -+void wg_allowedips_free(struct allowedips *table, struct mutex *lock) -+{ -+ struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; -+ -+ ++table->seq; -+ RCU_INIT_POINTER(table->root4, NULL); -+ RCU_INIT_POINTER(table->root6, NULL); -+ if (rcu_access_pointer(old4)) { -+ struct allowedips_node *node = rcu_dereference_protected(old4, -+ lockdep_is_held(lock)); -+ -+ root_remove_peer_lists(node); -+ call_rcu(&node->rcu, root_free_rcu); -+ } -+ if (rcu_access_pointer(old6)) { -+ struct allowedips_node *node = rcu_dereference_protected(old6, -+ lockdep_is_held(lock)); -+ -+ root_remove_peer_lists(node); -+ call_rcu(&node->rcu, root_free_rcu); -+ } -+} -+ -+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, -+ u8 cidr, struct wg_peer *peer, struct mutex *lock) -+{ -+ /* Aligned so it can be passed to fls */ -+ u8 key[4] __aligned(__alignof(u32)); -+ -+ ++table->seq; -+ swap_endian(key, (const u8 *)ip, 32); -+ return add(&table->root4, 32, key, cidr, peer, lock); -+} -+ -+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, -+ u8 cidr, struct wg_peer *peer, struct mutex *lock) -+{ -+ /* Aligned so it can be passed to fls64 */ -+ u8 key[16] __aligned(__alignof(u64)); -+ -+ ++table->seq; -+ swap_endian(key, (const u8 *)ip, 128); -+ return add(&table->root6, 128, key, cidr, peer, lock); -+} -+ -+void wg_allowedips_remove_by_peer(struct allowedips *table, -+ struct wg_peer *peer, struct mutex *lock) -+{ -+ ++table->seq; -+ walk_remove_by_peer(&table->root4, peer, lock); -+ walk_remove_by_peer(&table->root6, peer, lock); -+} -+ -+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) -+{ -+ const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); -+ swap_endian(ip, node->bits, node->bitlen); -+ memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); -+ if (node->cidr) -+ ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); -+ -+ *cidr = node->cidr; -+ return node->bitlen == 32 ? AF_INET : AF_INET6; -+} -+ -+/* Returns a strong reference to a peer */ -+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, -+ struct sk_buff *skb) -+{ -+ if (skb->protocol == htons(ETH_P_IP)) -+ return lookup(table->root4, 32, &ip_hdr(skb)->daddr); -+ else if (skb->protocol == htons(ETH_P_IPV6)) -+ return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); -+ return NULL; -+} -+ -+/* Returns a strong reference to a peer */ -+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, -+ struct sk_buff *skb) -+{ -+ if (skb->protocol == htons(ETH_P_IP)) -+ return lookup(table->root4, 32, &ip_hdr(skb)->saddr); -+ else if (skb->protocol == htons(ETH_P_IPV6)) -+ return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); -+ return NULL; -+} -+ -+#include "selftest/allowedips.c" ---- /dev/null -+++ b/drivers/net/wireguard/allowedips.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_ALLOWEDIPS_H -+#define _WG_ALLOWEDIPS_H -+ -+#include <linux/mutex.h> -+#include <linux/ip.h> -+#include <linux/ipv6.h> -+ -+struct wg_peer; -+ -+struct allowedips_node { -+ struct wg_peer __rcu *peer; -+ struct allowedips_node __rcu *bit[2]; -+ /* While it may seem scandalous that we waste space for v4, -+ * we're alloc'ing to the nearest power of 2 anyway, so this -+ * doesn't actually make a difference. -+ */ -+ u8 bits[16] __aligned(__alignof(u64)); -+ u8 cidr, bit_at_a, bit_at_b, bitlen; -+ -+ /* Keep rarely used list at bottom to be beyond cache line. */ -+ union { -+ struct list_head peer_list; -+ struct rcu_head rcu; -+ }; -+}; -+ -+struct allowedips { -+ struct allowedips_node __rcu *root4; -+ struct allowedips_node __rcu *root6; -+ u64 seq; -+}; -+ -+void wg_allowedips_init(struct allowedips *table); -+void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); -+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, -+ u8 cidr, struct wg_peer *peer, struct mutex *lock); -+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, -+ u8 cidr, struct wg_peer *peer, struct mutex *lock); -+void wg_allowedips_remove_by_peer(struct allowedips *table, -+ struct wg_peer *peer, struct mutex *lock); -+/* The ip input pointer should be __aligned(__alignof(u64))) */ -+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); -+ -+/* These return a strong reference to a peer: */ -+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, -+ struct sk_buff *skb); -+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, -+ struct sk_buff *skb); -+ -+#ifdef DEBUG -+bool wg_allowedips_selftest(void); -+#endif -+ -+#endif /* _WG_ALLOWEDIPS_H */ ---- /dev/null -+++ b/drivers/net/wireguard/cookie.c -@@ -0,0 +1,236 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "cookie.h" -+#include "peer.h" -+#include "device.h" -+#include "messages.h" -+#include "ratelimiter.h" -+#include "timers.h" -+ -+#include <crypto/blake2s.h> -+#include <crypto/chacha20poly1305.h> -+ -+#include <net/ipv6.h> -+#include <crypto/algapi.h> -+ -+void wg_cookie_checker_init(struct cookie_checker *checker, -+ struct wg_device *wg) -+{ -+ init_rwsem(&checker->secret_lock); -+ checker->secret_birthdate = ktime_get_coarse_boottime_ns(); -+ get_random_bytes(checker->secret, NOISE_HASH_LEN); -+ checker->device = wg; -+} -+ -+enum { COOKIE_KEY_LABEL_LEN = 8 }; -+static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; -+static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; -+ -+static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN], -+ const u8 label[COOKIE_KEY_LABEL_LEN]) -+{ -+ struct blake2s_state blake; -+ -+ blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); -+ blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); -+ blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); -+ blake2s_final(&blake, key); -+} -+ -+/* Must hold peer->handshake.static_identity->lock */ -+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) -+{ -+ if (likely(checker->device->static_identity.has_identity)) { -+ precompute_key(checker->cookie_encryption_key, -+ checker->device->static_identity.static_public, -+ cookie_key_label); -+ precompute_key(checker->message_mac1_key, -+ checker->device->static_identity.static_public, -+ mac1_key_label); -+ } else { -+ memset(checker->cookie_encryption_key, 0, -+ NOISE_SYMMETRIC_KEY_LEN); -+ memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); -+ } -+} -+ -+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) -+{ -+ precompute_key(peer->latest_cookie.cookie_decryption_key, -+ peer->handshake.remote_static, cookie_key_label); -+ precompute_key(peer->latest_cookie.message_mac1_key, -+ peer->handshake.remote_static, mac1_key_label); -+} -+ -+void wg_cookie_init(struct cookie *cookie) -+{ -+ memset(cookie, 0, sizeof(*cookie)); -+ init_rwsem(&cookie->lock); -+} -+ -+static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, -+ const u8 key[NOISE_SYMMETRIC_KEY_LEN]) -+{ -+ len = len - sizeof(struct message_macs) + -+ offsetof(struct message_macs, mac1); -+ blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); -+} -+ -+static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, -+ const u8 cookie[COOKIE_LEN]) -+{ -+ len = len - sizeof(struct message_macs) + -+ offsetof(struct message_macs, mac2); -+ blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); -+} -+ -+static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, -+ struct cookie_checker *checker) -+{ -+ struct blake2s_state state; -+ -+ if (wg_birthdate_has_expired(checker->secret_birthdate, -+ COOKIE_SECRET_MAX_AGE)) { -+ down_write(&checker->secret_lock); -+ checker->secret_birthdate = ktime_get_coarse_boottime_ns(); -+ get_random_bytes(checker->secret, NOISE_HASH_LEN); -+ up_write(&checker->secret_lock); -+ } -+ -+ down_read(&checker->secret_lock); -+ -+ blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); -+ if (skb->protocol == htons(ETH_P_IP)) -+ blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, -+ sizeof(struct in_addr)); -+ else if (skb->protocol == htons(ETH_P_IPV6)) -+ blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, -+ sizeof(struct in6_addr)); -+ blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); -+ blake2s_final(&state, cookie); -+ -+ up_read(&checker->secret_lock); -+} -+ -+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, -+ struct sk_buff *skb, -+ bool check_cookie) -+{ -+ struct message_macs *macs = (struct message_macs *) -+ (skb->data + skb->len - sizeof(*macs)); -+ enum cookie_mac_state ret; -+ u8 computed_mac[COOKIE_LEN]; -+ u8 cookie[COOKIE_LEN]; -+ -+ ret = INVALID_MAC; -+ compute_mac1(computed_mac, skb->data, skb->len, -+ checker->message_mac1_key); -+ if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) -+ goto out; -+ -+ ret = VALID_MAC_BUT_NO_COOKIE; -+ -+ if (!check_cookie) -+ goto out; -+ -+ make_cookie(cookie, skb, checker); -+ -+ compute_mac2(computed_mac, skb->data, skb->len, cookie); -+ if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) -+ goto out; -+ -+ ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; -+ if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) -+ goto out; -+ -+ ret = VALID_MAC_WITH_COOKIE; -+ -+out: -+ return ret; -+} -+ -+void wg_cookie_add_mac_to_packet(void *message, size_t len, -+ struct wg_peer *peer) -+{ -+ struct message_macs *macs = (struct message_macs *) -+ ((u8 *)message + len - sizeof(*macs)); -+ -+ down_write(&peer->latest_cookie.lock); -+ compute_mac1(macs->mac1, message, len, -+ peer->latest_cookie.message_mac1_key); -+ memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); -+ peer->latest_cookie.have_sent_mac1 = true; -+ up_write(&peer->latest_cookie.lock); -+ -+ down_read(&peer->latest_cookie.lock); -+ if (peer->latest_cookie.is_valid && -+ !wg_birthdate_has_expired(peer->latest_cookie.birthdate, -+ COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) -+ compute_mac2(macs->mac2, message, len, -+ peer->latest_cookie.cookie); -+ else -+ memset(macs->mac2, 0, COOKIE_LEN); -+ up_read(&peer->latest_cookie.lock); -+} -+ -+void wg_cookie_message_create(struct message_handshake_cookie *dst, -+ struct sk_buff *skb, __le32 index, -+ struct cookie_checker *checker) -+{ -+ struct message_macs *macs = (struct message_macs *) -+ ((u8 *)skb->data + skb->len - sizeof(*macs)); -+ u8 cookie[COOKIE_LEN]; -+ -+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); -+ dst->receiver_index = index; -+ get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); -+ -+ make_cookie(cookie, skb, checker); -+ xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, -+ macs->mac1, COOKIE_LEN, dst->nonce, -+ checker->cookie_encryption_key); -+} -+ -+void wg_cookie_message_consume(struct message_handshake_cookie *src, -+ struct wg_device *wg) -+{ -+ struct wg_peer *peer = NULL; -+ u8 cookie[COOKIE_LEN]; -+ bool ret; -+ -+ if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, -+ INDEX_HASHTABLE_HANDSHAKE | -+ INDEX_HASHTABLE_KEYPAIR, -+ src->receiver_index, &peer))) -+ return; -+ -+ down_read(&peer->latest_cookie.lock); -+ if (unlikely(!peer->latest_cookie.have_sent_mac1)) { -+ up_read(&peer->latest_cookie.lock); -+ goto out; -+ } -+ ret = xchacha20poly1305_decrypt( -+ cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), -+ peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, -+ peer->latest_cookie.cookie_decryption_key); -+ up_read(&peer->latest_cookie.lock); -+ -+ if (ret) { -+ down_write(&peer->latest_cookie.lock); -+ memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); -+ peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); -+ peer->latest_cookie.is_valid = true; -+ peer->latest_cookie.have_sent_mac1 = false; -+ up_write(&peer->latest_cookie.lock); -+ } else { -+ net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", -+ wg->dev->name); -+ } -+ -+out: -+ wg_peer_put(peer); -+} ---- /dev/null -+++ b/drivers/net/wireguard/cookie.h -@@ -0,0 +1,59 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_COOKIE_H -+#define _WG_COOKIE_H -+ -+#include "messages.h" -+#include <linux/rwsem.h> -+ -+struct wg_peer; -+ -+struct cookie_checker { -+ u8 secret[NOISE_HASH_LEN]; -+ u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; -+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; -+ u64 secret_birthdate; -+ struct rw_semaphore secret_lock; -+ struct wg_device *device; -+}; -+ -+struct cookie { -+ u64 birthdate; -+ bool is_valid; -+ u8 cookie[COOKIE_LEN]; -+ bool have_sent_mac1; -+ u8 last_mac1_sent[COOKIE_LEN]; -+ u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; -+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; -+ struct rw_semaphore lock; -+}; -+ -+enum cookie_mac_state { -+ INVALID_MAC, -+ VALID_MAC_BUT_NO_COOKIE, -+ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, -+ VALID_MAC_WITH_COOKIE -+}; -+ -+void wg_cookie_checker_init(struct cookie_checker *checker, -+ struct wg_device *wg); -+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); -+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); -+void wg_cookie_init(struct cookie *cookie); -+ -+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, -+ struct sk_buff *skb, -+ bool check_cookie); -+void wg_cookie_add_mac_to_packet(void *message, size_t len, -+ struct wg_peer *peer); -+ -+void wg_cookie_message_create(struct message_handshake_cookie *src, -+ struct sk_buff *skb, __le32 index, -+ struct cookie_checker *checker); -+void wg_cookie_message_consume(struct message_handshake_cookie *src, -+ struct wg_device *wg); -+ -+#endif /* _WG_COOKIE_H */ ---- /dev/null -+++ b/drivers/net/wireguard/device.c -@@ -0,0 +1,458 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "queueing.h" -+#include "socket.h" -+#include "timers.h" -+#include "device.h" -+#include "ratelimiter.h" -+#include "peer.h" -+#include "messages.h" -+ -+#include <linux/module.h> -+#include <linux/rtnetlink.h> -+#include <linux/inet.h> -+#include <linux/netdevice.h> -+#include <linux/inetdevice.h> -+#include <linux/if_arp.h> -+#include <linux/icmp.h> -+#include <linux/suspend.h> -+#include <net/icmp.h> -+#include <net/rtnetlink.h> -+#include <net/ip_tunnels.h> -+#include <net/addrconf.h> -+ -+static LIST_HEAD(device_list); -+ -+static int wg_open(struct net_device *dev) -+{ -+ struct in_device *dev_v4 = __in_dev_get_rtnl(dev); -+ struct inet6_dev *dev_v6 = __in6_dev_get(dev); -+ struct wg_device *wg = netdev_priv(dev); -+ struct wg_peer *peer; -+ int ret; -+ -+ if (dev_v4) { -+ /* At some point we might put this check near the ip_rt_send_ -+ * redirect call of ip_forward in net/ipv4/ip_forward.c, similar -+ * to the current secpath check. -+ */ -+ IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); -+ IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; -+ } -+ if (dev_v6) -+ dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; -+ -+ ret = wg_socket_init(wg, wg->incoming_port); -+ if (ret < 0) -+ return ret; -+ mutex_lock(&wg->device_update_lock); -+ list_for_each_entry(peer, &wg->peer_list, peer_list) { -+ wg_packet_send_staged_packets(peer); -+ if (peer->persistent_keepalive_interval) -+ wg_packet_send_keepalive(peer); -+ } -+ mutex_unlock(&wg->device_update_lock); -+ return 0; -+} -+ -+#ifdef CONFIG_PM_SLEEP -+static int wg_pm_notification(struct notifier_block *nb, unsigned long action, -+ void *data) -+{ -+ struct wg_device *wg; -+ struct wg_peer *peer; -+ -+ /* If the machine is constantly suspending and resuming, as part of -+ * its normal operation rather than as a somewhat rare event, then we -+ * don't actually want to clear keys. -+ */ -+ if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) -+ return 0; -+ -+ if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) -+ return 0; -+ -+ rtnl_lock(); -+ list_for_each_entry(wg, &device_list, device_list) { -+ mutex_lock(&wg->device_update_lock); -+ list_for_each_entry(peer, &wg->peer_list, peer_list) { -+ del_timer(&peer->timer_zero_key_material); -+ wg_noise_handshake_clear(&peer->handshake); -+ wg_noise_keypairs_clear(&peer->keypairs); -+ } -+ mutex_unlock(&wg->device_update_lock); -+ } -+ rtnl_unlock(); -+ rcu_barrier(); -+ return 0; -+} -+ -+static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; -+#endif -+ -+static int wg_stop(struct net_device *dev) -+{ -+ struct wg_device *wg = netdev_priv(dev); -+ struct wg_peer *peer; -+ -+ mutex_lock(&wg->device_update_lock); -+ list_for_each_entry(peer, &wg->peer_list, peer_list) { -+ wg_packet_purge_staged_packets(peer); -+ wg_timers_stop(peer); -+ wg_noise_handshake_clear(&peer->handshake); -+ wg_noise_keypairs_clear(&peer->keypairs); -+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); -+ } -+ mutex_unlock(&wg->device_update_lock); -+ skb_queue_purge(&wg->incoming_handshakes); -+ wg_socket_reinit(wg, NULL, NULL); -+ return 0; -+} -+ -+static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct wg_device *wg = netdev_priv(dev); -+ struct sk_buff_head packets; -+ struct wg_peer *peer; -+ struct sk_buff *next; -+ sa_family_t family; -+ u32 mtu; -+ int ret; -+ -+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { -+ ret = -EPROTONOSUPPORT; -+ net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); -+ goto err; -+ } -+ -+ peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); -+ if (unlikely(!peer)) { -+ ret = -ENOKEY; -+ if (skb->protocol == htons(ETH_P_IP)) -+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", -+ dev->name, &ip_hdr(skb)->daddr); -+ else if (skb->protocol == htons(ETH_P_IPV6)) -+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", -+ dev->name, &ipv6_hdr(skb)->daddr); -+ goto err; -+ } -+ -+ family = READ_ONCE(peer->endpoint.addr.sa_family); -+ if (unlikely(family != AF_INET && family != AF_INET6)) { -+ ret = -EDESTADDRREQ; -+ net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", -+ dev->name, peer->internal_id); -+ goto err_peer; -+ } -+ -+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; -+ -+ __skb_queue_head_init(&packets); -+ if (!skb_is_gso(skb)) { -+ skb_mark_not_on_list(skb); -+ } else { -+ struct sk_buff *segs = skb_gso_segment(skb, 0); -+ -+ if (unlikely(IS_ERR(segs))) { -+ ret = PTR_ERR(segs); -+ goto err_peer; -+ } -+ dev_kfree_skb(skb); -+ skb = segs; -+ } -+ -+ skb_list_walk_safe(skb, skb, next) { -+ skb_mark_not_on_list(skb); -+ -+ skb = skb_share_check(skb, GFP_ATOMIC); -+ if (unlikely(!skb)) -+ continue; -+ -+ /* We only need to keep the original dst around for icmp, -+ * so at this point we're in a position to drop it. -+ */ -+ skb_dst_drop(skb); -+ -+ PACKET_CB(skb)->mtu = mtu; -+ -+ __skb_queue_tail(&packets, skb); -+ } -+ -+ spin_lock_bh(&peer->staged_packet_queue.lock); -+ /* If the queue is getting too big, we start removing the oldest packets -+ * until it's small again. We do this before adding the new packet, so -+ * we don't remove GSO segments that are in excess. -+ */ -+ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { -+ dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); -+ ++dev->stats.tx_dropped; -+ } -+ skb_queue_splice_tail(&packets, &peer->staged_packet_queue); -+ spin_unlock_bh(&peer->staged_packet_queue.lock); -+ -+ wg_packet_send_staged_packets(peer); -+ -+ wg_peer_put(peer); -+ return NETDEV_TX_OK; -+ -+err_peer: -+ wg_peer_put(peer); -+err: -+ ++dev->stats.tx_errors; -+ if (skb->protocol == htons(ETH_P_IP)) -+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); -+ else if (skb->protocol == htons(ETH_P_IPV6)) -+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); -+ kfree_skb(skb); -+ return ret; -+} -+ -+static const struct net_device_ops netdev_ops = { -+ .ndo_open = wg_open, -+ .ndo_stop = wg_stop, -+ .ndo_start_xmit = wg_xmit, -+ .ndo_get_stats64 = ip_tunnel_get_stats64 -+}; -+ -+static void wg_destruct(struct net_device *dev) -+{ -+ struct wg_device *wg = netdev_priv(dev); -+ -+ rtnl_lock(); -+ list_del(&wg->device_list); -+ rtnl_unlock(); -+ mutex_lock(&wg->device_update_lock); -+ wg->incoming_port = 0; -+ wg_socket_reinit(wg, NULL, NULL); -+ /* The final references are cleared in the below calls to destroy_workqueue. */ -+ wg_peer_remove_all(wg); -+ destroy_workqueue(wg->handshake_receive_wq); -+ destroy_workqueue(wg->handshake_send_wq); -+ destroy_workqueue(wg->packet_crypt_wq); -+ wg_packet_queue_free(&wg->decrypt_queue, true); -+ wg_packet_queue_free(&wg->encrypt_queue, true); -+ rcu_barrier(); /* Wait for all the peers to be actually freed. */ -+ wg_ratelimiter_uninit(); -+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); -+ skb_queue_purge(&wg->incoming_handshakes); -+ free_percpu(dev->tstats); -+ free_percpu(wg->incoming_handshakes_worker); -+ if (wg->have_creating_net_ref) -+ put_net(wg->creating_net); -+ kvfree(wg->index_hashtable); -+ kvfree(wg->peer_hashtable); -+ mutex_unlock(&wg->device_update_lock); -+ -+ pr_debug("%s: Interface deleted\n", dev->name); -+ free_netdev(dev); -+} -+ -+static const struct device_type device_type = { .name = KBUILD_MODNAME }; -+ -+static void wg_setup(struct net_device *dev) -+{ -+ struct wg_device *wg = netdev_priv(dev); -+ enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | -+ NETIF_F_SG | NETIF_F_GSO | -+ NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; -+ -+ dev->netdev_ops = &netdev_ops; -+ dev->hard_header_len = 0; -+ dev->addr_len = 0; -+ dev->needed_headroom = DATA_PACKET_HEAD_ROOM; -+ dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); -+ dev->type = ARPHRD_NONE; -+ dev->flags = IFF_POINTOPOINT | IFF_NOARP; -+ dev->priv_flags |= IFF_NO_QUEUE; -+ dev->features |= NETIF_F_LLTX; -+ dev->features |= WG_NETDEV_FEATURES; -+ dev->hw_features |= WG_NETDEV_FEATURES; -+ dev->hw_enc_features |= WG_NETDEV_FEATURES; -+ dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - -+ sizeof(struct udphdr) - -+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); -+ -+ SET_NETDEV_DEVTYPE(dev, &device_type); -+ -+ /* We need to keep the dst around in case of icmp replies. */ -+ netif_keep_dst(dev); -+ -+ memset(wg, 0, sizeof(*wg)); -+ wg->dev = dev; -+} -+ -+static int wg_newlink(struct net *src_net, struct net_device *dev, -+ struct nlattr *tb[], struct nlattr *data[], -+ struct netlink_ext_ack *extack) -+{ -+ struct wg_device *wg = netdev_priv(dev); -+ int ret = -ENOMEM; -+ -+ wg->creating_net = src_net; -+ init_rwsem(&wg->static_identity.lock); -+ mutex_init(&wg->socket_update_lock); -+ mutex_init(&wg->device_update_lock); -+ skb_queue_head_init(&wg->incoming_handshakes); -+ wg_allowedips_init(&wg->peer_allowedips); -+ wg_cookie_checker_init(&wg->cookie_checker, wg); -+ INIT_LIST_HEAD(&wg->peer_list); -+ wg->device_update_gen = 1; -+ -+ wg->peer_hashtable = wg_pubkey_hashtable_alloc(); -+ if (!wg->peer_hashtable) -+ return ret; -+ -+ wg->index_hashtable = wg_index_hashtable_alloc(); -+ if (!wg->index_hashtable) -+ goto err_free_peer_hashtable; -+ -+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); -+ if (!dev->tstats) -+ goto err_free_index_hashtable; -+ -+ wg->incoming_handshakes_worker = -+ wg_packet_percpu_multicore_worker_alloc( -+ wg_packet_handshake_receive_worker, wg); -+ if (!wg->incoming_handshakes_worker) -+ goto err_free_tstats; -+ -+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", -+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); -+ if (!wg->handshake_receive_wq) -+ goto err_free_incoming_handshakes; -+ -+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", -+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); -+ if (!wg->handshake_send_wq) -+ goto err_destroy_handshake_receive; -+ -+ wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", -+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); -+ if (!wg->packet_crypt_wq) -+ goto err_destroy_handshake_send; -+ -+ ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, -+ true, MAX_QUEUED_PACKETS); -+ if (ret < 0) -+ goto err_destroy_packet_crypt; -+ -+ ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, -+ true, MAX_QUEUED_PACKETS); -+ if (ret < 0) -+ goto err_free_encrypt_queue; -+ -+ ret = wg_ratelimiter_init(); -+ if (ret < 0) -+ goto err_free_decrypt_queue; -+ -+ ret = register_netdevice(dev); -+ if (ret < 0) -+ goto err_uninit_ratelimiter; -+ -+ list_add(&wg->device_list, &device_list); -+ -+ /* We wait until the end to assign priv_destructor, so that -+ * register_netdevice doesn't call it for us if it fails. -+ */ -+ dev->priv_destructor = wg_destruct; -+ -+ pr_debug("%s: Interface created\n", dev->name); -+ return ret; -+ -+err_uninit_ratelimiter: -+ wg_ratelimiter_uninit(); -+err_free_decrypt_queue: -+ wg_packet_queue_free(&wg->decrypt_queue, true); -+err_free_encrypt_queue: -+ wg_packet_queue_free(&wg->encrypt_queue, true); -+err_destroy_packet_crypt: -+ destroy_workqueue(wg->packet_crypt_wq); -+err_destroy_handshake_send: -+ destroy_workqueue(wg->handshake_send_wq); -+err_destroy_handshake_receive: -+ destroy_workqueue(wg->handshake_receive_wq); -+err_free_incoming_handshakes: -+ free_percpu(wg->incoming_handshakes_worker); -+err_free_tstats: -+ free_percpu(dev->tstats); -+err_free_index_hashtable: -+ kvfree(wg->index_hashtable); -+err_free_peer_hashtable: -+ kvfree(wg->peer_hashtable); -+ return ret; -+} -+ -+static struct rtnl_link_ops link_ops __read_mostly = { -+ .kind = KBUILD_MODNAME, -+ .priv_size = sizeof(struct wg_device), -+ .setup = wg_setup, -+ .newlink = wg_newlink, -+}; -+ -+static int wg_netdevice_notification(struct notifier_block *nb, -+ unsigned long action, void *data) -+{ -+ struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; -+ struct wg_device *wg = netdev_priv(dev); -+ -+ ASSERT_RTNL(); -+ -+ if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) -+ return 0; -+ -+ if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { -+ put_net(wg->creating_net); -+ wg->have_creating_net_ref = false; -+ } else if (dev_net(dev) != wg->creating_net && -+ !wg->have_creating_net_ref) { -+ wg->have_creating_net_ref = true; -+ get_net(wg->creating_net); -+ } -+ return 0; -+} -+ -+static struct notifier_block netdevice_notifier = { -+ .notifier_call = wg_netdevice_notification -+}; -+ -+int __init wg_device_init(void) -+{ -+ int ret; -+ -+#ifdef CONFIG_PM_SLEEP -+ ret = register_pm_notifier(&pm_notifier); -+ if (ret) -+ return ret; -+#endif -+ -+ ret = register_netdevice_notifier(&netdevice_notifier); -+ if (ret) -+ goto error_pm; -+ -+ ret = rtnl_link_register(&link_ops); -+ if (ret) -+ goto error_netdevice; -+ -+ return 0; -+ -+error_netdevice: -+ unregister_netdevice_notifier(&netdevice_notifier); -+error_pm: -+#ifdef CONFIG_PM_SLEEP -+ unregister_pm_notifier(&pm_notifier); -+#endif -+ return ret; -+} -+ -+void wg_device_uninit(void) -+{ -+ rtnl_link_unregister(&link_ops); -+ unregister_netdevice_notifier(&netdevice_notifier); -+#ifdef CONFIG_PM_SLEEP -+ unregister_pm_notifier(&pm_notifier); -+#endif -+ rcu_barrier(); -+} ---- /dev/null -+++ b/drivers/net/wireguard/device.h -@@ -0,0 +1,65 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_DEVICE_H -+#define _WG_DEVICE_H -+ -+#include "noise.h" -+#include "allowedips.h" -+#include "peerlookup.h" -+#include "cookie.h" -+ -+#include <linux/types.h> -+#include <linux/netdevice.h> -+#include <linux/workqueue.h> -+#include <linux/mutex.h> -+#include <linux/net.h> -+#include <linux/ptr_ring.h> -+ -+struct wg_device; -+ -+struct multicore_worker { -+ void *ptr; -+ struct work_struct work; -+}; -+ -+struct crypt_queue { -+ struct ptr_ring ring; -+ union { -+ struct { -+ struct multicore_worker __percpu *worker; -+ int last_cpu; -+ }; -+ struct work_struct work; -+ }; -+}; -+ -+struct wg_device { -+ struct net_device *dev; -+ struct crypt_queue encrypt_queue, decrypt_queue; -+ struct sock __rcu *sock4, *sock6; -+ struct net *creating_net; -+ struct noise_static_identity static_identity; -+ struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; -+ struct workqueue_struct *packet_crypt_wq; -+ struct sk_buff_head incoming_handshakes; -+ int incoming_handshake_cpu; -+ struct multicore_worker __percpu *incoming_handshakes_worker; -+ struct cookie_checker cookie_checker; -+ struct pubkey_hashtable *peer_hashtable; -+ struct index_hashtable *index_hashtable; -+ struct allowedips peer_allowedips; -+ struct mutex device_update_lock, socket_update_lock; -+ struct list_head device_list, peer_list; -+ unsigned int num_peers, device_update_gen; -+ u32 fwmark; -+ u16 incoming_port; -+ bool have_creating_net_ref; -+}; -+ -+int wg_device_init(void); -+void wg_device_uninit(void); -+ -+#endif /* _WG_DEVICE_H */ ---- /dev/null -+++ b/drivers/net/wireguard/main.c -@@ -0,0 +1,64 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "version.h" -+#include "device.h" -+#include "noise.h" -+#include "queueing.h" -+#include "ratelimiter.h" -+#include "netlink.h" -+ -+#include <uapi/linux/wireguard.h> -+ -+#include <linux/version.h> -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/genetlink.h> -+#include <net/rtnetlink.h> -+ -+static int __init mod_init(void) -+{ -+ int ret; -+ -+#ifdef DEBUG -+ if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || -+ !wg_ratelimiter_selftest()) -+ return -ENOTRECOVERABLE; -+#endif -+ wg_noise_init(); -+ -+ ret = wg_device_init(); -+ if (ret < 0) -+ goto err_device; -+ -+ ret = wg_genetlink_init(); -+ if (ret < 0) -+ goto err_netlink; -+ -+ pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); -+ pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n"); -+ -+ return 0; -+ -+err_netlink: -+ wg_device_uninit(); -+err_device: -+ return ret; -+} -+ -+static void __exit mod_exit(void) -+{ -+ wg_genetlink_uninit(); -+ wg_device_uninit(); -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+MODULE_LICENSE("GPL v2"); -+MODULE_DESCRIPTION("WireGuard secure network tunnel"); -+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); -+MODULE_VERSION(WIREGUARD_VERSION); -+MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); -+MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); ---- /dev/null -+++ b/drivers/net/wireguard/messages.h -@@ -0,0 +1,128 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_MESSAGES_H -+#define _WG_MESSAGES_H -+ -+#include <crypto/curve25519.h> -+#include <crypto/chacha20poly1305.h> -+#include <crypto/blake2s.h> -+ -+#include <linux/kernel.h> -+#include <linux/param.h> -+#include <linux/skbuff.h> -+ -+enum noise_lengths { -+ NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, -+ NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, -+ NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), -+ NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, -+ NOISE_HASH_LEN = BLAKE2S_HASH_SIZE -+}; -+ -+#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) -+ -+enum cookie_values { -+ COOKIE_SECRET_MAX_AGE = 2 * 60, -+ COOKIE_SECRET_LATENCY = 5, -+ COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, -+ COOKIE_LEN = 16 -+}; -+ -+enum counter_values { -+ COUNTER_BITS_TOTAL = 2048, -+ COUNTER_REDUNDANT_BITS = BITS_PER_LONG, -+ COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS -+}; -+ -+enum limits { -+ REKEY_AFTER_MESSAGES = 1ULL << 60, -+ REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, -+ REKEY_TIMEOUT = 5, -+ REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, -+ REKEY_AFTER_TIME = 120, -+ REJECT_AFTER_TIME = 180, -+ INITIATIONS_PER_SECOND = 50, -+ MAX_PEERS_PER_DEVICE = 1U << 20, -+ KEEPALIVE_TIMEOUT = 10, -+ MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, -+ MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ -+ MAX_STAGED_PACKETS = 128, -+ MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ -+}; -+ -+enum message_type { -+ MESSAGE_INVALID = 0, -+ MESSAGE_HANDSHAKE_INITIATION = 1, -+ MESSAGE_HANDSHAKE_RESPONSE = 2, -+ MESSAGE_HANDSHAKE_COOKIE = 3, -+ MESSAGE_DATA = 4 -+}; -+ -+struct message_header { -+ /* The actual layout of this that we want is: -+ * u8 type -+ * u8 reserved_zero[3] -+ * -+ * But it turns out that by encoding this as little endian, -+ * we achieve the same thing, and it makes checking faster. -+ */ -+ __le32 type; -+}; -+ -+struct message_macs { -+ u8 mac1[COOKIE_LEN]; -+ u8 mac2[COOKIE_LEN]; -+}; -+ -+struct message_handshake_initiation { -+ struct message_header header; -+ __le32 sender_index; -+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; -+ u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; -+ u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; -+ struct message_macs macs; -+}; -+ -+struct message_handshake_response { -+ struct message_header header; -+ __le32 sender_index; -+ __le32 receiver_index; -+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; -+ u8 encrypted_nothing[noise_encrypted_len(0)]; -+ struct message_macs macs; -+}; -+ -+struct message_handshake_cookie { -+ struct message_header header; -+ __le32 receiver_index; -+ u8 nonce[COOKIE_NONCE_LEN]; -+ u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; -+}; -+ -+struct message_data { -+ struct message_header header; -+ __le32 key_idx; -+ __le64 counter; -+ u8 encrypted_data[]; -+}; -+ -+#define message_data_len(plain_len) \ -+ (noise_encrypted_len(plain_len) + sizeof(struct message_data)) -+ -+enum message_alignments { -+ MESSAGE_PADDING_MULTIPLE = 16, -+ MESSAGE_MINIMUM_LENGTH = message_data_len(0) -+}; -+ -+#define SKB_HEADER_LEN \ -+ (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ -+ sizeof(struct udphdr) + NET_SKB_PAD) -+#define DATA_PACKET_HEAD_ROOM \ -+ ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) -+ -+enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; -+ -+#endif /* _WG_MESSAGES_H */ ---- /dev/null -+++ b/drivers/net/wireguard/netlink.c -@@ -0,0 +1,648 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "netlink.h" -+#include "device.h" -+#include "peer.h" -+#include "socket.h" -+#include "queueing.h" -+#include "messages.h" -+ -+#include <uapi/linux/wireguard.h> -+ -+#include <linux/if.h> -+#include <net/genetlink.h> -+#include <net/sock.h> -+#include <crypto/algapi.h> -+ -+static struct genl_family genl_family; -+ -+static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { -+ [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, -+ [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, -+ [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -+ [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -+ [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, -+ [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, -+ [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, -+ [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } -+}; -+ -+static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { -+ [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -+ [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, -+ [WGPEER_A_FLAGS] = { .type = NLA_U32 }, -+ [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, -+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, -+ [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, -+ [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, -+ [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, -+ [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, -+ [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } -+}; -+ -+static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { -+ [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, -+ [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, -+ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } -+}; -+ -+static struct wg_device *lookup_interface(struct nlattr **attrs, -+ struct sk_buff *skb) -+{ -+ struct net_device *dev = NULL; -+ -+ if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) -+ return ERR_PTR(-EBADR); -+ if (attrs[WGDEVICE_A_IFINDEX]) -+ dev = dev_get_by_index(sock_net(skb->sk), -+ nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); -+ else if (attrs[WGDEVICE_A_IFNAME]) -+ dev = dev_get_by_name(sock_net(skb->sk), -+ nla_data(attrs[WGDEVICE_A_IFNAME])); -+ if (!dev) -+ return ERR_PTR(-ENODEV); -+ if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || -+ strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { -+ dev_put(dev); -+ return ERR_PTR(-EOPNOTSUPP); -+ } -+ return netdev_priv(dev); -+} -+ -+static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, -+ int family) -+{ -+ struct nlattr *allowedip_nest; -+ -+ allowedip_nest = nla_nest_start(skb, 0); -+ if (!allowedip_nest) -+ return -EMSGSIZE; -+ -+ if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || -+ nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || -+ nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? -+ sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { -+ nla_nest_cancel(skb, allowedip_nest); -+ return -EMSGSIZE; -+ } -+ -+ nla_nest_end(skb, allowedip_nest); -+ return 0; -+} -+ -+struct dump_ctx { -+ struct wg_device *wg; -+ struct wg_peer *next_peer; -+ u64 allowedips_seq; -+ struct allowedips_node *next_allowedip; -+}; -+ -+#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) -+ -+static int -+get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) -+{ -+ -+ struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); -+ struct allowedips_node *allowedips_node = ctx->next_allowedip; -+ bool fail; -+ -+ if (!peer_nest) -+ return -EMSGSIZE; -+ -+ down_read(&peer->handshake.lock); -+ fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, -+ peer->handshake.remote_static); -+ up_read(&peer->handshake.lock); -+ if (fail) -+ goto err; -+ -+ if (!allowedips_node) { -+ const struct __kernel_timespec last_handshake = { -+ .tv_sec = peer->walltime_last_handshake.tv_sec, -+ .tv_nsec = peer->walltime_last_handshake.tv_nsec -+ }; -+ -+ down_read(&peer->handshake.lock); -+ fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, -+ NOISE_SYMMETRIC_KEY_LEN, -+ peer->handshake.preshared_key); -+ up_read(&peer->handshake.lock); -+ if (fail) -+ goto err; -+ -+ if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, -+ sizeof(last_handshake), &last_handshake) || -+ nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, -+ peer->persistent_keepalive_interval) || -+ nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, -+ WGPEER_A_UNSPEC) || -+ nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, -+ WGPEER_A_UNSPEC) || -+ nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) -+ goto err; -+ -+ read_lock_bh(&peer->endpoint_lock); -+ if (peer->endpoint.addr.sa_family == AF_INET) -+ fail = nla_put(skb, WGPEER_A_ENDPOINT, -+ sizeof(peer->endpoint.addr4), -+ &peer->endpoint.addr4); -+ else if (peer->endpoint.addr.sa_family == AF_INET6) -+ fail = nla_put(skb, WGPEER_A_ENDPOINT, -+ sizeof(peer->endpoint.addr6), -+ &peer->endpoint.addr6); -+ read_unlock_bh(&peer->endpoint_lock); -+ if (fail) -+ goto err; -+ allowedips_node = -+ list_first_entry_or_null(&peer->allowedips_list, -+ struct allowedips_node, peer_list); -+ } -+ if (!allowedips_node) -+ goto no_allowedips; -+ if (!ctx->allowedips_seq) -+ ctx->allowedips_seq = peer->device->peer_allowedips.seq; -+ else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) -+ goto no_allowedips; -+ -+ allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); -+ if (!allowedips_nest) -+ goto err; -+ -+ list_for_each_entry_from(allowedips_node, &peer->allowedips_list, -+ peer_list) { -+ u8 cidr, ip[16] __aligned(__alignof(u64)); -+ int family; -+ -+ family = wg_allowedips_read_node(allowedips_node, ip, &cidr); -+ if (get_allowedips(skb, ip, cidr, family)) { -+ nla_nest_end(skb, allowedips_nest); -+ nla_nest_end(skb, peer_nest); -+ ctx->next_allowedip = allowedips_node; -+ return -EMSGSIZE; -+ } -+ } -+ nla_nest_end(skb, allowedips_nest); -+no_allowedips: -+ nla_nest_end(skb, peer_nest); -+ ctx->next_allowedip = NULL; -+ ctx->allowedips_seq = 0; -+ return 0; -+err: -+ nla_nest_cancel(skb, peer_nest); -+ return -EMSGSIZE; -+} -+ -+static int wg_get_device_start(struct netlink_callback *cb) -+{ -+ struct nlattr **attrs = genl_family_attrbuf(&genl_family); -+ struct wg_device *wg; -+ int ret; -+ -+ ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs, -+ genl_family.maxattr, device_policy, NULL); -+ if (ret < 0) -+ return ret; -+ wg = lookup_interface(attrs, cb->skb); -+ if (IS_ERR(wg)) -+ return PTR_ERR(wg); -+ DUMP_CTX(cb)->wg = wg; -+ return 0; -+} -+ -+static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) -+{ -+ struct wg_peer *peer, *next_peer_cursor; -+ struct dump_ctx *ctx = DUMP_CTX(cb); -+ struct wg_device *wg = ctx->wg; -+ struct nlattr *peers_nest; -+ int ret = -EMSGSIZE; -+ bool done = true; -+ void *hdr; -+ -+ rtnl_lock(); -+ mutex_lock(&wg->device_update_lock); -+ cb->seq = wg->device_update_gen; -+ next_peer_cursor = ctx->next_peer; -+ -+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, -+ &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); -+ if (!hdr) -+ goto out; -+ genl_dump_check_consistent(cb, hdr); -+ -+ if (!ctx->next_peer) { -+ if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, -+ wg->incoming_port) || -+ nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || -+ nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || -+ nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) -+ goto out; -+ -+ down_read(&wg->static_identity.lock); -+ if (wg->static_identity.has_identity) { -+ if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, -+ NOISE_PUBLIC_KEY_LEN, -+ wg->static_identity.static_private) || -+ nla_put(skb, WGDEVICE_A_PUBLIC_KEY, -+ NOISE_PUBLIC_KEY_LEN, -+ wg->static_identity.static_public)) { -+ up_read(&wg->static_identity.lock); -+ goto out; -+ } -+ } -+ up_read(&wg->static_identity.lock); -+ } -+ -+ peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); -+ if (!peers_nest) -+ goto out; -+ ret = 0; -+ /* If the last cursor was removed via list_del_init in peer_remove, then -+ * we just treat this the same as there being no more peers left. The -+ * reason is that seq_nr should indicate to userspace that this isn't a -+ * coherent dump anyway, so they'll try again. -+ */ -+ if (list_empty(&wg->peer_list) || -+ (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { -+ nla_nest_cancel(skb, peers_nest); -+ goto out; -+ } -+ lockdep_assert_held(&wg->device_update_lock); -+ peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); -+ list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { -+ if (get_peer(peer, skb, ctx)) { -+ done = false; -+ break; -+ } -+ next_peer_cursor = peer; -+ } -+ nla_nest_end(skb, peers_nest); -+ -+out: -+ if (!ret && !done && next_peer_cursor) -+ wg_peer_get(next_peer_cursor); -+ wg_peer_put(ctx->next_peer); -+ mutex_unlock(&wg->device_update_lock); -+ rtnl_unlock(); -+ -+ if (ret) { -+ genlmsg_cancel(skb, hdr); -+ return ret; -+ } -+ genlmsg_end(skb, hdr); -+ if (done) { -+ ctx->next_peer = NULL; -+ return 0; -+ } -+ ctx->next_peer = next_peer_cursor; -+ return skb->len; -+ -+ /* At this point, we can't really deal ourselves with safely zeroing out -+ * the private key material after usage. This will need an additional API -+ * in the kernel for marking skbs as zero_on_free. -+ */ -+} -+ -+static int wg_get_device_done(struct netlink_callback *cb) -+{ -+ struct dump_ctx *ctx = DUMP_CTX(cb); -+ -+ if (ctx->wg) -+ dev_put(ctx->wg->dev); -+ wg_peer_put(ctx->next_peer); -+ return 0; -+} -+ -+static int set_port(struct wg_device *wg, u16 port) -+{ -+ struct wg_peer *peer; -+ -+ if (wg->incoming_port == port) -+ return 0; -+ list_for_each_entry(peer, &wg->peer_list, peer_list) -+ wg_socket_clear_peer_endpoint_src(peer); -+ if (!netif_running(wg->dev)) { -+ wg->incoming_port = port; -+ return 0; -+ } -+ return wg_socket_init(wg, port); -+} -+ -+static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) -+{ -+ int ret = -EINVAL; -+ u16 family; -+ u8 cidr; -+ -+ if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || -+ !attrs[WGALLOWEDIP_A_CIDR_MASK]) -+ return ret; -+ family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); -+ cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); -+ -+ if (family == AF_INET && cidr <= 32 && -+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) -+ ret = wg_allowedips_insert_v4( -+ &peer->device->peer_allowedips, -+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, -+ &peer->device->device_update_lock); -+ else if (family == AF_INET6 && cidr <= 128 && -+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) -+ ret = wg_allowedips_insert_v6( -+ &peer->device->peer_allowedips, -+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, -+ &peer->device->device_update_lock); -+ -+ return ret; -+} -+ -+static int set_peer(struct wg_device *wg, struct nlattr **attrs) -+{ -+ u8 *public_key = NULL, *preshared_key = NULL; -+ struct wg_peer *peer = NULL; -+ u32 flags = 0; -+ int ret; -+ -+ ret = -EINVAL; -+ if (attrs[WGPEER_A_PUBLIC_KEY] && -+ nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) -+ public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); -+ else -+ goto out; -+ if (attrs[WGPEER_A_PRESHARED_KEY] && -+ nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) -+ preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); -+ -+ if (attrs[WGPEER_A_FLAGS]) -+ flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); -+ ret = -EOPNOTSUPP; -+ if (flags & ~__WGPEER_F_ALL) -+ goto out; -+ -+ ret = -EPFNOSUPPORT; -+ if (attrs[WGPEER_A_PROTOCOL_VERSION]) { -+ if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) -+ goto out; -+ } -+ -+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, -+ nla_data(attrs[WGPEER_A_PUBLIC_KEY])); -+ ret = 0; -+ if (!peer) { /* Peer doesn't exist yet. Add a new one. */ -+ if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) -+ goto out; -+ -+ /* The peer is new, so there aren't allowed IPs to remove. */ -+ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; -+ -+ down_read(&wg->static_identity.lock); -+ if (wg->static_identity.has_identity && -+ !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), -+ wg->static_identity.static_public, -+ NOISE_PUBLIC_KEY_LEN)) { -+ /* We silently ignore peers that have the same public -+ * key as the device. The reason we do it silently is -+ * that we'd like for people to be able to reuse the -+ * same set of API calls across peers. -+ */ -+ up_read(&wg->static_identity.lock); -+ ret = 0; -+ goto out; -+ } -+ up_read(&wg->static_identity.lock); -+ -+ peer = wg_peer_create(wg, public_key, preshared_key); -+ if (IS_ERR(peer)) { -+ /* Similar to the above, if the key is invalid, we skip -+ * it without fanfare, so that services don't need to -+ * worry about doing key validation themselves. -+ */ -+ ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); -+ peer = NULL; -+ goto out; -+ } -+ /* Take additional reference, as though we've just been -+ * looked up. -+ */ -+ wg_peer_get(peer); -+ } -+ -+ if (flags & WGPEER_F_REMOVE_ME) { -+ wg_peer_remove(peer); -+ goto out; -+ } -+ -+ if (preshared_key) { -+ down_write(&peer->handshake.lock); -+ memcpy(&peer->handshake.preshared_key, preshared_key, -+ NOISE_SYMMETRIC_KEY_LEN); -+ up_write(&peer->handshake.lock); -+ } -+ -+ if (attrs[WGPEER_A_ENDPOINT]) { -+ struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); -+ size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); -+ -+ if ((len == sizeof(struct sockaddr_in) && -+ addr->sa_family == AF_INET) || -+ (len == sizeof(struct sockaddr_in6) && -+ addr->sa_family == AF_INET6)) { -+ struct endpoint endpoint = { { { 0 } } }; -+ -+ memcpy(&endpoint.addr, addr, len); -+ wg_socket_set_peer_endpoint(peer, &endpoint); -+ } -+ } -+ -+ if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) -+ wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, -+ &wg->device_update_lock); -+ -+ if (attrs[WGPEER_A_ALLOWEDIPS]) { -+ struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; -+ int rem; -+ -+ nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { -+ ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, -+ attr, allowedip_policy, NULL); -+ if (ret < 0) -+ goto out; -+ ret = set_allowedip(peer, allowedip); -+ if (ret < 0) -+ goto out; -+ } -+ } -+ -+ if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { -+ const u16 persistent_keepalive_interval = nla_get_u16( -+ attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); -+ const bool send_keepalive = -+ !peer->persistent_keepalive_interval && -+ persistent_keepalive_interval && -+ netif_running(wg->dev); -+ -+ peer->persistent_keepalive_interval = persistent_keepalive_interval; -+ if (send_keepalive) -+ wg_packet_send_keepalive(peer); -+ } -+ -+ if (netif_running(wg->dev)) -+ wg_packet_send_staged_packets(peer); -+ -+out: -+ wg_peer_put(peer); -+ if (attrs[WGPEER_A_PRESHARED_KEY]) -+ memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), -+ nla_len(attrs[WGPEER_A_PRESHARED_KEY])); -+ return ret; -+} -+ -+static int wg_set_device(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct wg_device *wg = lookup_interface(info->attrs, skb); -+ u32 flags = 0; -+ int ret; -+ -+ if (IS_ERR(wg)) { -+ ret = PTR_ERR(wg); -+ goto out_nodev; -+ } -+ -+ rtnl_lock(); -+ mutex_lock(&wg->device_update_lock); -+ -+ if (info->attrs[WGDEVICE_A_FLAGS]) -+ flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); -+ ret = -EOPNOTSUPP; -+ if (flags & ~__WGDEVICE_F_ALL) -+ goto out; -+ -+ ret = -EPERM; -+ if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || -+ info->attrs[WGDEVICE_A_FWMARK]) && -+ !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) -+ goto out; -+ -+ ++wg->device_update_gen; -+ -+ if (info->attrs[WGDEVICE_A_FWMARK]) { -+ struct wg_peer *peer; -+ -+ wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); -+ list_for_each_entry(peer, &wg->peer_list, peer_list) -+ wg_socket_clear_peer_endpoint_src(peer); -+ } -+ -+ if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { -+ ret = set_port(wg, -+ nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); -+ if (ret) -+ goto out; -+ } -+ -+ if (flags & WGDEVICE_F_REPLACE_PEERS) -+ wg_peer_remove_all(wg); -+ -+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && -+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == -+ NOISE_PUBLIC_KEY_LEN) { -+ u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); -+ u8 public_key[NOISE_PUBLIC_KEY_LEN]; -+ struct wg_peer *peer, *temp; -+ -+ if (!crypto_memneq(wg->static_identity.static_private, -+ private_key, NOISE_PUBLIC_KEY_LEN)) -+ goto skip_set_private_key; -+ -+ /* We remove before setting, to prevent race, which means doing -+ * two 25519-genpub ops. -+ */ -+ if (curve25519_generate_public(public_key, private_key)) { -+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, -+ public_key); -+ if (peer) { -+ wg_peer_put(peer); -+ wg_peer_remove(peer); -+ } -+ } -+ -+ down_write(&wg->static_identity.lock); -+ wg_noise_set_static_identity_private_key(&wg->static_identity, -+ private_key); -+ list_for_each_entry_safe(peer, temp, &wg->peer_list, -+ peer_list) { -+ if (wg_noise_precompute_static_static(peer)) -+ wg_noise_expire_current_peer_keypairs(peer); -+ else -+ wg_peer_remove(peer); -+ } -+ wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); -+ up_write(&wg->static_identity.lock); -+ } -+skip_set_private_key: -+ -+ if (info->attrs[WGDEVICE_A_PEERS]) { -+ struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; -+ int rem; -+ -+ nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { -+ ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, -+ peer_policy, NULL); -+ if (ret < 0) -+ goto out; -+ ret = set_peer(wg, peer); -+ if (ret < 0) -+ goto out; -+ } -+ } -+ ret = 0; -+ -+out: -+ mutex_unlock(&wg->device_update_lock); -+ rtnl_unlock(); -+ dev_put(wg->dev); -+out_nodev: -+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) -+ memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), -+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); -+ return ret; -+} -+ -+static const struct genl_ops genl_ops[] = { -+ { -+ .cmd = WG_CMD_GET_DEVICE, -+ .start = wg_get_device_start, -+ .dumpit = wg_get_device_dump, -+ .done = wg_get_device_done, -+ .flags = GENL_UNS_ADMIN_PERM -+ }, { -+ .cmd = WG_CMD_SET_DEVICE, -+ .doit = wg_set_device, -+ .flags = GENL_UNS_ADMIN_PERM -+ } -+}; -+ -+static struct genl_family genl_family __ro_after_init = { -+ .ops = genl_ops, -+ .n_ops = ARRAY_SIZE(genl_ops), -+ .name = WG_GENL_NAME, -+ .version = WG_GENL_VERSION, -+ .maxattr = WGDEVICE_A_MAX, -+ .module = THIS_MODULE, -+ .policy = device_policy, -+ .netnsok = true -+}; -+ -+int __init wg_genetlink_init(void) -+{ -+ return genl_register_family(&genl_family); -+} -+ -+void __exit wg_genetlink_uninit(void) -+{ -+ genl_unregister_family(&genl_family); -+} ---- /dev/null -+++ b/drivers/net/wireguard/netlink.h -@@ -0,0 +1,12 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_NETLINK_H -+#define _WG_NETLINK_H -+ -+int wg_genetlink_init(void); -+void wg_genetlink_uninit(void); -+ -+#endif /* _WG_NETLINK_H */ ---- /dev/null -+++ b/drivers/net/wireguard/noise.c -@@ -0,0 +1,828 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "noise.h" -+#include "device.h" -+#include "peer.h" -+#include "messages.h" -+#include "queueing.h" -+#include "peerlookup.h" -+ -+#include <linux/rcupdate.h> -+#include <linux/slab.h> -+#include <linux/bitmap.h> -+#include <linux/scatterlist.h> -+#include <linux/highmem.h> -+#include <crypto/algapi.h> -+ -+/* This implements Noise_IKpsk2: -+ * -+ * <- s -+ * ****** -+ * -> e, es, s, ss, {t} -+ * <- e, ee, se, psk, {} -+ */ -+ -+static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; -+static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; -+static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; -+static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; -+static atomic64_t keypair_counter = ATOMIC64_INIT(0); -+ -+void __init wg_noise_init(void) -+{ -+ struct blake2s_state blake; -+ -+ blake2s(handshake_init_chaining_key, handshake_name, NULL, -+ NOISE_HASH_LEN, sizeof(handshake_name), 0); -+ blake2s_init(&blake, NOISE_HASH_LEN); -+ blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); -+ blake2s_update(&blake, identifier_name, sizeof(identifier_name)); -+ blake2s_final(&blake, handshake_init_hash); -+} -+ -+/* Must hold peer->handshake.static_identity->lock */ -+bool wg_noise_precompute_static_static(struct wg_peer *peer) -+{ -+ bool ret = true; -+ -+ down_write(&peer->handshake.lock); -+ if (peer->handshake.static_identity->has_identity) -+ ret = curve25519( -+ peer->handshake.precomputed_static_static, -+ peer->handshake.static_identity->static_private, -+ peer->handshake.remote_static); -+ else -+ memset(peer->handshake.precomputed_static_static, 0, -+ NOISE_PUBLIC_KEY_LEN); -+ up_write(&peer->handshake.lock); -+ return ret; -+} -+ -+bool wg_noise_handshake_init(struct noise_handshake *handshake, -+ struct noise_static_identity *static_identity, -+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -+ struct wg_peer *peer) -+{ -+ memset(handshake, 0, sizeof(*handshake)); -+ init_rwsem(&handshake->lock); -+ handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; -+ handshake->entry.peer = peer; -+ memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); -+ if (peer_preshared_key) -+ memcpy(handshake->preshared_key, peer_preshared_key, -+ NOISE_SYMMETRIC_KEY_LEN); -+ handshake->static_identity = static_identity; -+ handshake->state = HANDSHAKE_ZEROED; -+ return wg_noise_precompute_static_static(peer); -+} -+ -+static void handshake_zero(struct noise_handshake *handshake) -+{ -+ memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); -+ memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); -+ memset(&handshake->hash, 0, NOISE_HASH_LEN); -+ memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); -+ handshake->remote_index = 0; -+ handshake->state = HANDSHAKE_ZEROED; -+} -+ -+void wg_noise_handshake_clear(struct noise_handshake *handshake) -+{ -+ wg_index_hashtable_remove( -+ handshake->entry.peer->device->index_hashtable, -+ &handshake->entry); -+ down_write(&handshake->lock); -+ handshake_zero(handshake); -+ up_write(&handshake->lock); -+ wg_index_hashtable_remove( -+ handshake->entry.peer->device->index_hashtable, -+ &handshake->entry); -+} -+ -+static struct noise_keypair *keypair_create(struct wg_peer *peer) -+{ -+ struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); -+ -+ if (unlikely(!keypair)) -+ return NULL; -+ keypair->internal_id = atomic64_inc_return(&keypair_counter); -+ keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; -+ keypair->entry.peer = peer; -+ kref_init(&keypair->refcount); -+ return keypair; -+} -+ -+static void keypair_free_rcu(struct rcu_head *rcu) -+{ -+ kzfree(container_of(rcu, struct noise_keypair, rcu)); -+} -+ -+static void keypair_free_kref(struct kref *kref) -+{ -+ struct noise_keypair *keypair = -+ container_of(kref, struct noise_keypair, refcount); -+ -+ net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", -+ keypair->entry.peer->device->dev->name, -+ keypair->internal_id, -+ keypair->entry.peer->internal_id); -+ wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, -+ &keypair->entry); -+ call_rcu(&keypair->rcu, keypair_free_rcu); -+} -+ -+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) -+{ -+ if (unlikely(!keypair)) -+ return; -+ if (unlikely(unreference_now)) -+ wg_index_hashtable_remove( -+ keypair->entry.peer->device->index_hashtable, -+ &keypair->entry); -+ kref_put(&keypair->refcount, keypair_free_kref); -+} -+ -+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) -+{ -+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), -+ "Taking noise keypair reference without holding the RCU BH read lock"); -+ if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) -+ return NULL; -+ return keypair; -+} -+ -+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) -+{ -+ struct noise_keypair *old; -+ -+ spin_lock_bh(&keypairs->keypair_update_lock); -+ -+ /* We zero the next_keypair before zeroing the others, so that -+ * wg_noise_received_with_keypair returns early before subsequent ones -+ * are zeroed. -+ */ -+ old = rcu_dereference_protected(keypairs->next_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ RCU_INIT_POINTER(keypairs->next_keypair, NULL); -+ wg_noise_keypair_put(old, true); -+ -+ old = rcu_dereference_protected(keypairs->previous_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL); -+ wg_noise_keypair_put(old, true); -+ -+ old = rcu_dereference_protected(keypairs->current_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ RCU_INIT_POINTER(keypairs->current_keypair, NULL); -+ wg_noise_keypair_put(old, true); -+ -+ spin_unlock_bh(&keypairs->keypair_update_lock); -+} -+ -+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) -+{ -+ struct noise_keypair *keypair; -+ -+ wg_noise_handshake_clear(&peer->handshake); -+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); -+ -+ spin_lock_bh(&peer->keypairs.keypair_update_lock); -+ keypair = rcu_dereference_protected(peer->keypairs.next_keypair, -+ lockdep_is_held(&peer->keypairs.keypair_update_lock)); -+ if (keypair) -+ keypair->sending.is_valid = false; -+ keypair = rcu_dereference_protected(peer->keypairs.current_keypair, -+ lockdep_is_held(&peer->keypairs.keypair_update_lock)); -+ if (keypair) -+ keypair->sending.is_valid = false; -+ spin_unlock_bh(&peer->keypairs.keypair_update_lock); -+} -+ -+static void add_new_keypair(struct noise_keypairs *keypairs, -+ struct noise_keypair *new_keypair) -+{ -+ struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; -+ -+ spin_lock_bh(&keypairs->keypair_update_lock); -+ previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ next_keypair = rcu_dereference_protected(keypairs->next_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ current_keypair = rcu_dereference_protected(keypairs->current_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ if (new_keypair->i_am_the_initiator) { -+ /* If we're the initiator, it means we've sent a handshake, and -+ * received a confirmation response, which means this new -+ * keypair can now be used. -+ */ -+ if (next_keypair) { -+ /* If there already was a next keypair pending, we -+ * demote it to be the previous keypair, and free the -+ * existing current. Note that this means KCI can result -+ * in this transition. It would perhaps be more sound to -+ * always just get rid of the unused next keypair -+ * instead of putting it in the previous slot, but this -+ * might be a bit less robust. Something to think about -+ * for the future. -+ */ -+ RCU_INIT_POINTER(keypairs->next_keypair, NULL); -+ rcu_assign_pointer(keypairs->previous_keypair, -+ next_keypair); -+ wg_noise_keypair_put(current_keypair, true); -+ } else /* If there wasn't an existing next keypair, we replace -+ * the previous with the current one. -+ */ -+ rcu_assign_pointer(keypairs->previous_keypair, -+ current_keypair); -+ /* At this point we can get rid of the old previous keypair, and -+ * set up the new keypair. -+ */ -+ wg_noise_keypair_put(previous_keypair, true); -+ rcu_assign_pointer(keypairs->current_keypair, new_keypair); -+ } else { -+ /* If we're the responder, it means we can't use the new keypair -+ * until we receive confirmation via the first data packet, so -+ * we get rid of the existing previous one, the possibly -+ * existing next one, and slide in the new next one. -+ */ -+ rcu_assign_pointer(keypairs->next_keypair, new_keypair); -+ wg_noise_keypair_put(next_keypair, true); -+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL); -+ wg_noise_keypair_put(previous_keypair, true); -+ } -+ spin_unlock_bh(&keypairs->keypair_update_lock); -+} -+ -+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, -+ struct noise_keypair *received_keypair) -+{ -+ struct noise_keypair *old_keypair; -+ bool key_is_new; -+ -+ /* We first check without taking the spinlock. */ -+ key_is_new = received_keypair == -+ rcu_access_pointer(keypairs->next_keypair); -+ if (likely(!key_is_new)) -+ return false; -+ -+ spin_lock_bh(&keypairs->keypair_update_lock); -+ /* After locking, we double check that things didn't change from -+ * beneath us. -+ */ -+ if (unlikely(received_keypair != -+ rcu_dereference_protected(keypairs->next_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)))) { -+ spin_unlock_bh(&keypairs->keypair_update_lock); -+ return false; -+ } -+ -+ /* When we've finally received the confirmation, we slide the next -+ * into the current, the current into the previous, and get rid of -+ * the old previous. -+ */ -+ old_keypair = rcu_dereference_protected(keypairs->previous_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock)); -+ rcu_assign_pointer(keypairs->previous_keypair, -+ rcu_dereference_protected(keypairs->current_keypair, -+ lockdep_is_held(&keypairs->keypair_update_lock))); -+ wg_noise_keypair_put(old_keypair, true); -+ rcu_assign_pointer(keypairs->current_keypair, received_keypair); -+ RCU_INIT_POINTER(keypairs->next_keypair, NULL); -+ -+ spin_unlock_bh(&keypairs->keypair_update_lock); -+ return true; -+} -+ -+/* Must hold static_identity->lock */ -+void wg_noise_set_static_identity_private_key( -+ struct noise_static_identity *static_identity, -+ const u8 private_key[NOISE_PUBLIC_KEY_LEN]) -+{ -+ memcpy(static_identity->static_private, private_key, -+ NOISE_PUBLIC_KEY_LEN); -+ curve25519_clamp_secret(static_identity->static_private); -+ static_identity->has_identity = curve25519_generate_public( -+ static_identity->static_public, private_key); -+} -+ -+/* This is Hugo Krawczyk's HKDF: -+ * - https://eprint.iacr.org/2010/264.pdf -+ * - https://tools.ietf.org/html/rfc5869 -+ */ -+static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, -+ size_t first_len, size_t second_len, size_t third_len, -+ size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) -+{ -+ u8 output[BLAKE2S_HASH_SIZE + 1]; -+ u8 secret[BLAKE2S_HASH_SIZE]; -+ -+ WARN_ON(IS_ENABLED(DEBUG) && -+ (first_len > BLAKE2S_HASH_SIZE || -+ second_len > BLAKE2S_HASH_SIZE || -+ third_len > BLAKE2S_HASH_SIZE || -+ ((second_len || second_dst || third_len || third_dst) && -+ (!first_len || !first_dst)) || -+ ((third_len || third_dst) && (!second_len || !second_dst)))); -+ -+ /* Extract entropy from data into secret */ -+ blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); -+ -+ if (!first_dst || !first_len) -+ goto out; -+ -+ /* Expand first key: key = secret, data = 0x1 */ -+ output[0] = 1; -+ blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); -+ memcpy(first_dst, output, first_len); -+ -+ if (!second_dst || !second_len) -+ goto out; -+ -+ /* Expand second key: key = secret, data = first-key || 0x2 */ -+ output[BLAKE2S_HASH_SIZE] = 2; -+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, -+ BLAKE2S_HASH_SIZE); -+ memcpy(second_dst, output, second_len); -+ -+ if (!third_dst || !third_len) -+ goto out; -+ -+ /* Expand third key: key = secret, data = second-key || 0x3 */ -+ output[BLAKE2S_HASH_SIZE] = 3; -+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, -+ BLAKE2S_HASH_SIZE); -+ memcpy(third_dst, output, third_len); -+ -+out: -+ /* Clear sensitive data from stack */ -+ memzero_explicit(secret, BLAKE2S_HASH_SIZE); -+ memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); -+} -+ -+static void symmetric_key_init(struct noise_symmetric_key *key) -+{ -+ spin_lock_init(&key->counter.receive.lock); -+ atomic64_set(&key->counter.counter, 0); -+ memset(key->counter.receive.backtrack, 0, -+ sizeof(key->counter.receive.backtrack)); -+ key->birthdate = ktime_get_coarse_boottime_ns(); -+ key->is_valid = true; -+} -+ -+static void derive_keys(struct noise_symmetric_key *first_dst, -+ struct noise_symmetric_key *second_dst, -+ const u8 chaining_key[NOISE_HASH_LEN]) -+{ -+ kdf(first_dst->key, second_dst->key, NULL, NULL, -+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, -+ chaining_key); -+ symmetric_key_init(first_dst); -+ symmetric_key_init(second_dst); -+} -+ -+static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], -+ u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ const u8 private[NOISE_PUBLIC_KEY_LEN], -+ const u8 public[NOISE_PUBLIC_KEY_LEN]) -+{ -+ u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; -+ -+ if (unlikely(!curve25519(dh_calculation, private, public))) -+ return false; -+ kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, -+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); -+ memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); -+ return true; -+} -+ -+static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) -+{ -+ struct blake2s_state blake; -+ -+ blake2s_init(&blake, NOISE_HASH_LEN); -+ blake2s_update(&blake, hash, NOISE_HASH_LEN); -+ blake2s_update(&blake, src, src_len); -+ blake2s_final(&blake, hash); -+} -+ -+static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], -+ u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) -+{ -+ u8 temp_hash[NOISE_HASH_LEN]; -+ -+ kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, -+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); -+ mix_hash(hash, temp_hash, NOISE_HASH_LEN); -+ memzero_explicit(temp_hash, NOISE_HASH_LEN); -+} -+ -+static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], -+ u8 hash[NOISE_HASH_LEN], -+ const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) -+{ -+ memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); -+ memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); -+ mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); -+} -+ -+static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, -+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ u8 hash[NOISE_HASH_LEN]) -+{ -+ chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, -+ NOISE_HASH_LEN, -+ 0 /* Always zero for Noise_IK */, key); -+ mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); -+} -+ -+static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, -+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ u8 hash[NOISE_HASH_LEN]) -+{ -+ if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, -+ hash, NOISE_HASH_LEN, -+ 0 /* Always zero for Noise_IK */, key)) -+ return false; -+ mix_hash(hash, src_ciphertext, src_len); -+ return true; -+} -+ -+static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], -+ const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], -+ u8 chaining_key[NOISE_HASH_LEN], -+ u8 hash[NOISE_HASH_LEN]) -+{ -+ if (ephemeral_dst != ephemeral_src) -+ memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); -+ mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); -+ kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, -+ NOISE_PUBLIC_KEY_LEN, chaining_key); -+} -+ -+static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) -+{ -+ struct timespec64 now; -+ -+ ktime_get_real_ts64(&now); -+ -+ /* In order to prevent some sort of infoleak from precise timers, we -+ * round down the nanoseconds part to the closest rounded-down power of -+ * two to the maximum initiations per second allowed anyway by the -+ * implementation. -+ */ -+ now.tv_nsec = ALIGN_DOWN(now.tv_nsec, -+ rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); -+ -+ /* https://cr.yp.to/libtai/tai64.html */ -+ *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); -+ *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); -+} -+ -+bool -+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, -+ struct noise_handshake *handshake) -+{ -+ u8 timestamp[NOISE_TIMESTAMP_LEN]; -+ u8 key[NOISE_SYMMETRIC_KEY_LEN]; -+ bool ret = false; -+ -+ /* We need to wait for crng _before_ taking any locks, since -+ * curve25519_generate_secret uses get_random_bytes_wait. -+ */ -+ wait_for_random_bytes(); -+ -+ down_read(&handshake->static_identity->lock); -+ down_write(&handshake->lock); -+ -+ if (unlikely(!handshake->static_identity->has_identity)) -+ goto out; -+ -+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); -+ -+ handshake_init(handshake->chaining_key, handshake->hash, -+ handshake->remote_static); -+ -+ /* e */ -+ curve25519_generate_secret(handshake->ephemeral_private); -+ if (!curve25519_generate_public(dst->unencrypted_ephemeral, -+ handshake->ephemeral_private)) -+ goto out; -+ message_ephemeral(dst->unencrypted_ephemeral, -+ dst->unencrypted_ephemeral, handshake->chaining_key, -+ handshake->hash); -+ -+ /* es */ -+ if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, -+ handshake->remote_static)) -+ goto out; -+ -+ /* s */ -+ message_encrypt(dst->encrypted_static, -+ handshake->static_identity->static_public, -+ NOISE_PUBLIC_KEY_LEN, key, handshake->hash); -+ -+ /* ss */ -+ kdf(handshake->chaining_key, key, NULL, -+ handshake->precomputed_static_static, NOISE_HASH_LEN, -+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, -+ handshake->chaining_key); -+ -+ /* {t} */ -+ tai64n_now(timestamp); -+ message_encrypt(dst->encrypted_timestamp, timestamp, -+ NOISE_TIMESTAMP_LEN, key, handshake->hash); -+ -+ dst->sender_index = wg_index_hashtable_insert( -+ handshake->entry.peer->device->index_hashtable, -+ &handshake->entry); -+ -+ handshake->state = HANDSHAKE_CREATED_INITIATION; -+ ret = true; -+ -+out: -+ up_write(&handshake->lock); -+ up_read(&handshake->static_identity->lock); -+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); -+ return ret; -+} -+ -+struct wg_peer * -+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, -+ struct wg_device *wg) -+{ -+ struct wg_peer *peer = NULL, *ret_peer = NULL; -+ struct noise_handshake *handshake; -+ bool replay_attack, flood_attack; -+ u8 key[NOISE_SYMMETRIC_KEY_LEN]; -+ u8 chaining_key[NOISE_HASH_LEN]; -+ u8 hash[NOISE_HASH_LEN]; -+ u8 s[NOISE_PUBLIC_KEY_LEN]; -+ u8 e[NOISE_PUBLIC_KEY_LEN]; -+ u8 t[NOISE_TIMESTAMP_LEN]; -+ u64 initiation_consumption; -+ -+ down_read(&wg->static_identity.lock); -+ if (unlikely(!wg->static_identity.has_identity)) -+ goto out; -+ -+ handshake_init(chaining_key, hash, wg->static_identity.static_public); -+ -+ /* e */ -+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); -+ -+ /* es */ -+ if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) -+ goto out; -+ -+ /* s */ -+ if (!message_decrypt(s, src->encrypted_static, -+ sizeof(src->encrypted_static), key, hash)) -+ goto out; -+ -+ /* Lookup which peer we're actually talking to */ -+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); -+ if (!peer) -+ goto out; -+ handshake = &peer->handshake; -+ -+ /* ss */ -+ kdf(chaining_key, key, NULL, handshake->precomputed_static_static, -+ NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, -+ chaining_key); -+ -+ /* {t} */ -+ if (!message_decrypt(t, src->encrypted_timestamp, -+ sizeof(src->encrypted_timestamp), key, hash)) -+ goto out; -+ -+ down_read(&handshake->lock); -+ replay_attack = memcmp(t, handshake->latest_timestamp, -+ NOISE_TIMESTAMP_LEN) <= 0; -+ flood_attack = (s64)handshake->last_initiation_consumption + -+ NSEC_PER_SEC / INITIATIONS_PER_SECOND > -+ (s64)ktime_get_coarse_boottime_ns(); -+ up_read(&handshake->lock); -+ if (replay_attack || flood_attack) -+ goto out; -+ -+ /* Success! Copy everything to peer */ -+ down_write(&handshake->lock); -+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); -+ if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) -+ memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); -+ memcpy(handshake->hash, hash, NOISE_HASH_LEN); -+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); -+ handshake->remote_index = src->sender_index; -+ if ((s64)(handshake->last_initiation_consumption - -+ (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) -+ handshake->last_initiation_consumption = initiation_consumption; -+ handshake->state = HANDSHAKE_CONSUMED_INITIATION; -+ up_write(&handshake->lock); -+ ret_peer = peer; -+ -+out: -+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); -+ memzero_explicit(hash, NOISE_HASH_LEN); -+ memzero_explicit(chaining_key, NOISE_HASH_LEN); -+ up_read(&wg->static_identity.lock); -+ if (!ret_peer) -+ wg_peer_put(peer); -+ return ret_peer; -+} -+ -+bool wg_noise_handshake_create_response(struct message_handshake_response *dst, -+ struct noise_handshake *handshake) -+{ -+ u8 key[NOISE_SYMMETRIC_KEY_LEN]; -+ bool ret = false; -+ -+ /* We need to wait for crng _before_ taking any locks, since -+ * curve25519_generate_secret uses get_random_bytes_wait. -+ */ -+ wait_for_random_bytes(); -+ -+ down_read(&handshake->static_identity->lock); -+ down_write(&handshake->lock); -+ -+ if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) -+ goto out; -+ -+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); -+ dst->receiver_index = handshake->remote_index; -+ -+ /* e */ -+ curve25519_generate_secret(handshake->ephemeral_private); -+ if (!curve25519_generate_public(dst->unencrypted_ephemeral, -+ handshake->ephemeral_private)) -+ goto out; -+ message_ephemeral(dst->unencrypted_ephemeral, -+ dst->unencrypted_ephemeral, handshake->chaining_key, -+ handshake->hash); -+ -+ /* ee */ -+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, -+ handshake->remote_ephemeral)) -+ goto out; -+ -+ /* se */ -+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, -+ handshake->remote_static)) -+ goto out; -+ -+ /* psk */ -+ mix_psk(handshake->chaining_key, handshake->hash, key, -+ handshake->preshared_key); -+ -+ /* {} */ -+ message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); -+ -+ dst->sender_index = wg_index_hashtable_insert( -+ handshake->entry.peer->device->index_hashtable, -+ &handshake->entry); -+ -+ handshake->state = HANDSHAKE_CREATED_RESPONSE; -+ ret = true; -+ -+out: -+ up_write(&handshake->lock); -+ up_read(&handshake->static_identity->lock); -+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); -+ return ret; -+} -+ -+struct wg_peer * -+wg_noise_handshake_consume_response(struct message_handshake_response *src, -+ struct wg_device *wg) -+{ -+ enum noise_handshake_state state = HANDSHAKE_ZEROED; -+ struct wg_peer *peer = NULL, *ret_peer = NULL; -+ struct noise_handshake *handshake; -+ u8 key[NOISE_SYMMETRIC_KEY_LEN]; -+ u8 hash[NOISE_HASH_LEN]; -+ u8 chaining_key[NOISE_HASH_LEN]; -+ u8 e[NOISE_PUBLIC_KEY_LEN]; -+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; -+ u8 static_private[NOISE_PUBLIC_KEY_LEN]; -+ -+ down_read(&wg->static_identity.lock); -+ -+ if (unlikely(!wg->static_identity.has_identity)) -+ goto out; -+ -+ handshake = (struct noise_handshake *)wg_index_hashtable_lookup( -+ wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, -+ src->receiver_index, &peer); -+ if (unlikely(!handshake)) -+ goto out; -+ -+ down_read(&handshake->lock); -+ state = handshake->state; -+ memcpy(hash, handshake->hash, NOISE_HASH_LEN); -+ memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); -+ memcpy(ephemeral_private, handshake->ephemeral_private, -+ NOISE_PUBLIC_KEY_LEN); -+ up_read(&handshake->lock); -+ -+ if (state != HANDSHAKE_CREATED_INITIATION) -+ goto fail; -+ -+ /* e */ -+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); -+ -+ /* ee */ -+ if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) -+ goto fail; -+ -+ /* se */ -+ if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) -+ goto fail; -+ -+ /* psk */ -+ mix_psk(chaining_key, hash, key, handshake->preshared_key); -+ -+ /* {} */ -+ if (!message_decrypt(NULL, src->encrypted_nothing, -+ sizeof(src->encrypted_nothing), key, hash)) -+ goto fail; -+ -+ /* Success! Copy everything to peer */ -+ down_write(&handshake->lock); -+ /* It's important to check that the state is still the same, while we -+ * have an exclusive lock. -+ */ -+ if (handshake->state != state) { -+ up_write(&handshake->lock); -+ goto fail; -+ } -+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); -+ memcpy(handshake->hash, hash, NOISE_HASH_LEN); -+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); -+ handshake->remote_index = src->sender_index; -+ handshake->state = HANDSHAKE_CONSUMED_RESPONSE; -+ up_write(&handshake->lock); -+ ret_peer = peer; -+ goto out; -+ -+fail: -+ wg_peer_put(peer); -+out: -+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); -+ memzero_explicit(hash, NOISE_HASH_LEN); -+ memzero_explicit(chaining_key, NOISE_HASH_LEN); -+ memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); -+ memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); -+ up_read(&wg->static_identity.lock); -+ return ret_peer; -+} -+ -+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, -+ struct noise_keypairs *keypairs) -+{ -+ struct noise_keypair *new_keypair; -+ bool ret = false; -+ -+ down_write(&handshake->lock); -+ if (handshake->state != HANDSHAKE_CREATED_RESPONSE && -+ handshake->state != HANDSHAKE_CONSUMED_RESPONSE) -+ goto out; -+ -+ new_keypair = keypair_create(handshake->entry.peer); -+ if (!new_keypair) -+ goto out; -+ new_keypair->i_am_the_initiator = handshake->state == -+ HANDSHAKE_CONSUMED_RESPONSE; -+ new_keypair->remote_index = handshake->remote_index; -+ -+ if (new_keypair->i_am_the_initiator) -+ derive_keys(&new_keypair->sending, &new_keypair->receiving, -+ handshake->chaining_key); -+ else -+ derive_keys(&new_keypair->receiving, &new_keypair->sending, -+ handshake->chaining_key); -+ -+ handshake_zero(handshake); -+ rcu_read_lock_bh(); -+ if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, -+ handshake)->is_dead))) { -+ add_new_keypair(keypairs, new_keypair); -+ net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", -+ handshake->entry.peer->device->dev->name, -+ new_keypair->internal_id, -+ handshake->entry.peer->internal_id); -+ ret = wg_index_hashtable_replace( -+ handshake->entry.peer->device->index_hashtable, -+ &handshake->entry, &new_keypair->entry); -+ } else { -+ kzfree(new_keypair); -+ } -+ rcu_read_unlock_bh(); -+ -+out: -+ up_write(&handshake->lock); -+ return ret; -+} ---- /dev/null -+++ b/drivers/net/wireguard/noise.h -@@ -0,0 +1,137 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+#ifndef _WG_NOISE_H -+#define _WG_NOISE_H -+ -+#include "messages.h" -+#include "peerlookup.h" -+ -+#include <linux/types.h> -+#include <linux/spinlock.h> -+#include <linux/atomic.h> -+#include <linux/rwsem.h> -+#include <linux/mutex.h> -+#include <linux/kref.h> -+ -+union noise_counter { -+ struct { -+ u64 counter; -+ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; -+ spinlock_t lock; -+ } receive; -+ atomic64_t counter; -+}; -+ -+struct noise_symmetric_key { -+ u8 key[NOISE_SYMMETRIC_KEY_LEN]; -+ union noise_counter counter; -+ u64 birthdate; -+ bool is_valid; -+}; -+ -+struct noise_keypair { -+ struct index_hashtable_entry entry; -+ struct noise_symmetric_key sending; -+ struct noise_symmetric_key receiving; -+ __le32 remote_index; -+ bool i_am_the_initiator; -+ struct kref refcount; -+ struct rcu_head rcu; -+ u64 internal_id; -+}; -+ -+struct noise_keypairs { -+ struct noise_keypair __rcu *current_keypair; -+ struct noise_keypair __rcu *previous_keypair; -+ struct noise_keypair __rcu *next_keypair; -+ spinlock_t keypair_update_lock; -+}; -+ -+struct noise_static_identity { -+ u8 static_public[NOISE_PUBLIC_KEY_LEN]; -+ u8 static_private[NOISE_PUBLIC_KEY_LEN]; -+ struct rw_semaphore lock; -+ bool has_identity; -+}; -+ -+enum noise_handshake_state { -+ HANDSHAKE_ZEROED, -+ HANDSHAKE_CREATED_INITIATION, -+ HANDSHAKE_CONSUMED_INITIATION, -+ HANDSHAKE_CREATED_RESPONSE, -+ HANDSHAKE_CONSUMED_RESPONSE -+}; -+ -+struct noise_handshake { -+ struct index_hashtable_entry entry; -+ -+ enum noise_handshake_state state; -+ u64 last_initiation_consumption; -+ -+ struct noise_static_identity *static_identity; -+ -+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; -+ u8 remote_static[NOISE_PUBLIC_KEY_LEN]; -+ u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; -+ u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; -+ -+ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; -+ -+ u8 hash[NOISE_HASH_LEN]; -+ u8 chaining_key[NOISE_HASH_LEN]; -+ -+ u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; -+ __le32 remote_index; -+ -+ /* Protects all members except the immutable (after noise_handshake_ -+ * init): remote_static, precomputed_static_static, static_identity. -+ */ -+ struct rw_semaphore lock; -+}; -+ -+struct wg_device; -+ -+void wg_noise_init(void); -+bool wg_noise_handshake_init(struct noise_handshake *handshake, -+ struct noise_static_identity *static_identity, -+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -+ struct wg_peer *peer); -+void wg_noise_handshake_clear(struct noise_handshake *handshake); -+static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) -+{ -+ atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - -+ (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); -+} -+ -+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); -+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); -+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); -+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, -+ struct noise_keypair *received_keypair); -+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); -+ -+void wg_noise_set_static_identity_private_key( -+ struct noise_static_identity *static_identity, -+ const u8 private_key[NOISE_PUBLIC_KEY_LEN]); -+bool wg_noise_precompute_static_static(struct wg_peer *peer); -+ -+bool -+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, -+ struct noise_handshake *handshake); -+struct wg_peer * -+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, -+ struct wg_device *wg); -+ -+bool wg_noise_handshake_create_response(struct message_handshake_response *dst, -+ struct noise_handshake *handshake); -+struct wg_peer * -+wg_noise_handshake_consume_response(struct message_handshake_response *src, -+ struct wg_device *wg); -+ -+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, -+ struct noise_keypairs *keypairs); -+ -+#endif /* _WG_NOISE_H */ ---- /dev/null -+++ b/drivers/net/wireguard/peer.c -@@ -0,0 +1,240 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "peer.h" -+#include "device.h" -+#include "queueing.h" -+#include "timers.h" -+#include "peerlookup.h" -+#include "noise.h" -+ -+#include <linux/kref.h> -+#include <linux/lockdep.h> -+#include <linux/rcupdate.h> -+#include <linux/list.h> -+ -+static atomic64_t peer_counter = ATOMIC64_INIT(0); -+ -+struct wg_peer *wg_peer_create(struct wg_device *wg, -+ const u8 public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) -+{ -+ struct wg_peer *peer; -+ int ret = -ENOMEM; -+ -+ lockdep_assert_held(&wg->device_update_lock); -+ -+ if (wg->num_peers >= MAX_PEERS_PER_DEVICE) -+ return ERR_PTR(ret); -+ -+ peer = kzalloc(sizeof(*peer), GFP_KERNEL); -+ if (unlikely(!peer)) -+ return ERR_PTR(ret); -+ peer->device = wg; -+ -+ if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, -+ public_key, preshared_key, peer)) { -+ ret = -EKEYREJECTED; -+ goto err_1; -+ } -+ if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) -+ goto err_1; -+ if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, -+ MAX_QUEUED_PACKETS)) -+ goto err_2; -+ if (wg_packet_queue_init(&peer->rx_queue, NULL, false, -+ MAX_QUEUED_PACKETS)) -+ goto err_3; -+ -+ peer->internal_id = atomic64_inc_return(&peer_counter); -+ peer->serial_work_cpu = nr_cpumask_bits; -+ wg_cookie_init(&peer->latest_cookie); -+ wg_timers_init(peer); -+ wg_cookie_checker_precompute_peer_keys(peer); -+ spin_lock_init(&peer->keypairs.keypair_update_lock); -+ INIT_WORK(&peer->transmit_handshake_work, -+ wg_packet_handshake_send_worker); -+ rwlock_init(&peer->endpoint_lock); -+ kref_init(&peer->refcount); -+ skb_queue_head_init(&peer->staged_packet_queue); -+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); -+ set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); -+ netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, -+ NAPI_POLL_WEIGHT); -+ napi_enable(&peer->napi); -+ list_add_tail(&peer->peer_list, &wg->peer_list); -+ INIT_LIST_HEAD(&peer->allowedips_list); -+ wg_pubkey_hashtable_add(wg->peer_hashtable, peer); -+ ++wg->num_peers; -+ pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); -+ return peer; -+ -+err_3: -+ wg_packet_queue_free(&peer->tx_queue, false); -+err_2: -+ dst_cache_destroy(&peer->endpoint_cache); -+err_1: -+ kfree(peer); -+ return ERR_PTR(ret); -+} -+ -+struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) -+{ -+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), -+ "Taking peer reference without holding the RCU read lock"); -+ if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) -+ return NULL; -+ return peer; -+} -+ -+static void peer_make_dead(struct wg_peer *peer) -+{ -+ /* Remove from configuration-time lookup structures. */ -+ list_del_init(&peer->peer_list); -+ wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, -+ &peer->device->device_update_lock); -+ wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); -+ -+ /* Mark as dead, so that we don't allow jumping contexts after. */ -+ WRITE_ONCE(peer->is_dead, true); -+ -+ /* The caller must now synchronize_rcu() for this to take effect. */ -+} -+ -+static void peer_remove_after_dead(struct wg_peer *peer) -+{ -+ WARN_ON(!peer->is_dead); -+ -+ /* No more keypairs can be created for this peer, since is_dead protects -+ * add_new_keypair, so we can now destroy existing ones. -+ */ -+ wg_noise_keypairs_clear(&peer->keypairs); -+ -+ /* Destroy all ongoing timers that were in-flight at the beginning of -+ * this function. -+ */ -+ wg_timers_stop(peer); -+ -+ /* The transition between packet encryption/decryption queues isn't -+ * guarded by is_dead, but each reference's life is strictly bounded by -+ * two generations: once for parallel crypto and once for serial -+ * ingestion, so we can simply flush twice, and be sure that we no -+ * longer have references inside these queues. -+ */ -+ -+ /* a) For encrypt/decrypt. */ -+ flush_workqueue(peer->device->packet_crypt_wq); -+ /* b.1) For send (but not receive, since that's napi). */ -+ flush_workqueue(peer->device->packet_crypt_wq); -+ /* b.2.1) For receive (but not send, since that's wq). */ -+ napi_disable(&peer->napi); -+ /* b.2.1) It's now safe to remove the napi struct, which must be done -+ * here from process context. -+ */ -+ netif_napi_del(&peer->napi); -+ -+ /* Ensure any workstructs we own (like transmit_handshake_work or -+ * clear_peer_work) no longer are in use. -+ */ -+ flush_workqueue(peer->device->handshake_send_wq); -+ -+ /* After the above flushes, a peer might still be active in a few -+ * different contexts: 1) from xmit(), before hitting is_dead and -+ * returning, 2) from wg_packet_consume_data(), before hitting is_dead -+ * and returning, 3) from wg_receive_handshake_packet() after a point -+ * where it has processed an incoming handshake packet, but where -+ * all calls to pass it off to timers fails because of is_dead. We won't -+ * have new references in (1) eventually, because we're removed from -+ * allowedips; we won't have new references in (2) eventually, because -+ * wg_index_hashtable_lookup will always return NULL, since we removed -+ * all existing keypairs and no more can be created; we won't have new -+ * references in (3) eventually, because we're removed from the pubkey -+ * hash table, which allows for a maximum of one handshake response, -+ * via the still-uncleared index hashtable entry, but not more than one, -+ * and in wg_cookie_message_consume, the lookup eventually gets a peer -+ * with a refcount of zero, so no new reference is taken. -+ */ -+ -+ --peer->device->num_peers; -+ wg_peer_put(peer); -+} -+ -+/* We have a separate "remove" function make sure that all active places where -+ * a peer is currently operating will eventually come to an end and not pass -+ * their reference onto another context. -+ */ -+void wg_peer_remove(struct wg_peer *peer) -+{ -+ if (unlikely(!peer)) -+ return; -+ lockdep_assert_held(&peer->device->device_update_lock); -+ -+ peer_make_dead(peer); -+ synchronize_rcu(); -+ peer_remove_after_dead(peer); -+} -+ -+void wg_peer_remove_all(struct wg_device *wg) -+{ -+ struct wg_peer *peer, *temp; -+ LIST_HEAD(dead_peers); -+ -+ lockdep_assert_held(&wg->device_update_lock); -+ -+ /* Avoid having to traverse individually for each one. */ -+ wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); -+ -+ list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { -+ peer_make_dead(peer); -+ list_add_tail(&peer->peer_list, &dead_peers); -+ } -+ synchronize_rcu(); -+ list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) -+ peer_remove_after_dead(peer); -+} -+ -+static void rcu_release(struct rcu_head *rcu) -+{ -+ struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); -+ -+ dst_cache_destroy(&peer->endpoint_cache); -+ wg_packet_queue_free(&peer->rx_queue, false); -+ wg_packet_queue_free(&peer->tx_queue, false); -+ -+ /* The final zeroing takes care of clearing any remaining handshake key -+ * material and other potentially sensitive information. -+ */ -+ kzfree(peer); -+} -+ -+static void kref_release(struct kref *refcount) -+{ -+ struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); -+ -+ pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ -+ /* Remove ourself from dynamic runtime lookup structures, now that the -+ * last reference is gone. -+ */ -+ wg_index_hashtable_remove(peer->device->index_hashtable, -+ &peer->handshake.entry); -+ -+ /* Remove any lingering packets that didn't have a chance to be -+ * transmitted. -+ */ -+ wg_packet_purge_staged_packets(peer); -+ -+ /* Free the memory used. */ -+ call_rcu(&peer->rcu, rcu_release); -+} -+ -+void wg_peer_put(struct wg_peer *peer) -+{ -+ if (unlikely(!peer)) -+ return; -+ kref_put(&peer->refcount, kref_release); -+} ---- /dev/null -+++ b/drivers/net/wireguard/peer.h -@@ -0,0 +1,83 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_PEER_H -+#define _WG_PEER_H -+ -+#include "device.h" -+#include "noise.h" -+#include "cookie.h" -+ -+#include <linux/types.h> -+#include <linux/netfilter.h> -+#include <linux/spinlock.h> -+#include <linux/kref.h> -+#include <net/dst_cache.h> -+ -+struct wg_device; -+ -+struct endpoint { -+ union { -+ struct sockaddr addr; -+ struct sockaddr_in addr4; -+ struct sockaddr_in6 addr6; -+ }; -+ union { -+ struct { -+ struct in_addr src4; -+ /* Essentially the same as addr6->scope_id */ -+ int src_if4; -+ }; -+ struct in6_addr src6; -+ }; -+}; -+ -+struct wg_peer { -+ struct wg_device *device; -+ struct crypt_queue tx_queue, rx_queue; -+ struct sk_buff_head staged_packet_queue; -+ int serial_work_cpu; -+ struct noise_keypairs keypairs; -+ struct endpoint endpoint; -+ struct dst_cache endpoint_cache; -+ rwlock_t endpoint_lock; -+ struct noise_handshake handshake; -+ atomic64_t last_sent_handshake; -+ struct work_struct transmit_handshake_work, clear_peer_work; -+ struct cookie latest_cookie; -+ struct hlist_node pubkey_hash; -+ u64 rx_bytes, tx_bytes; -+ struct timer_list timer_retransmit_handshake, timer_send_keepalive; -+ struct timer_list timer_new_handshake, timer_zero_key_material; -+ struct timer_list timer_persistent_keepalive; -+ unsigned int timer_handshake_attempts; -+ u16 persistent_keepalive_interval; -+ bool timer_need_another_keepalive; -+ bool sent_lastminute_handshake; -+ struct timespec64 walltime_last_handshake; -+ struct kref refcount; -+ struct rcu_head rcu; -+ struct list_head peer_list; -+ struct list_head allowedips_list; -+ u64 internal_id; -+ struct napi_struct napi; -+ bool is_dead; -+}; -+ -+struct wg_peer *wg_peer_create(struct wg_device *wg, -+ const u8 public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); -+ -+struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); -+static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) -+{ -+ kref_get(&peer->refcount); -+ return peer; -+} -+void wg_peer_put(struct wg_peer *peer); -+void wg_peer_remove(struct wg_peer *peer); -+void wg_peer_remove_all(struct wg_device *wg); -+ -+#endif /* _WG_PEER_H */ ---- /dev/null -+++ b/drivers/net/wireguard/peerlookup.c -@@ -0,0 +1,221 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "peerlookup.h" -+#include "peer.h" -+#include "noise.h" -+ -+static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, -+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) -+{ -+ /* siphash gives us a secure 64bit number based on a random key. Since -+ * the bits are uniformly distributed, we can then mask off to get the -+ * bits we need. -+ */ -+ const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); -+ -+ return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; -+} -+ -+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) -+{ -+ struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); -+ -+ if (!table) -+ return NULL; -+ -+ get_random_bytes(&table->key, sizeof(table->key)); -+ hash_init(table->hashtable); -+ mutex_init(&table->lock); -+ return table; -+} -+ -+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, -+ struct wg_peer *peer) -+{ -+ mutex_lock(&table->lock); -+ hlist_add_head_rcu(&peer->pubkey_hash, -+ pubkey_bucket(table, peer->handshake.remote_static)); -+ mutex_unlock(&table->lock); -+} -+ -+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, -+ struct wg_peer *peer) -+{ -+ mutex_lock(&table->lock); -+ hlist_del_init_rcu(&peer->pubkey_hash); -+ mutex_unlock(&table->lock); -+} -+ -+/* Returns a strong reference to a peer */ -+struct wg_peer * -+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, -+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) -+{ -+ struct wg_peer *iter_peer, *peer = NULL; -+ -+ rcu_read_lock_bh(); -+ hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), -+ pubkey_hash) { -+ if (!memcmp(pubkey, iter_peer->handshake.remote_static, -+ NOISE_PUBLIC_KEY_LEN)) { -+ peer = iter_peer; -+ break; -+ } -+ } -+ peer = wg_peer_get_maybe_zero(peer); -+ rcu_read_unlock_bh(); -+ return peer; -+} -+ -+static struct hlist_head *index_bucket(struct index_hashtable *table, -+ const __le32 index) -+{ -+ /* Since the indices are random and thus all bits are uniformly -+ * distributed, we can find its bucket simply by masking. -+ */ -+ return &table->hashtable[(__force u32)index & -+ (HASH_SIZE(table->hashtable) - 1)]; -+} -+ -+struct index_hashtable *wg_index_hashtable_alloc(void) -+{ -+ struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); -+ -+ if (!table) -+ return NULL; -+ -+ hash_init(table->hashtable); -+ spin_lock_init(&table->lock); -+ return table; -+} -+ -+/* At the moment, we limit ourselves to 2^20 total peers, which generally might -+ * amount to 2^20*3 items in this hashtable. The algorithm below works by -+ * picking a random number and testing it. We can see that these limits mean we -+ * usually succeed pretty quickly: -+ * -+ * >>> def calculation(tries, size): -+ * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) -+ * ... -+ * >>> calculation(1, 2**20 * 3) -+ * 0.999267578125 -+ * >>> calculation(2, 2**20 * 3) -+ * 0.0007318854331970215 -+ * >>> calculation(3, 2**20 * 3) -+ * 5.360489012673497e-07 -+ * >>> calculation(4, 2**20 * 3) -+ * 3.9261394135792216e-10 -+ * -+ * At the moment, we don't do any masking, so this algorithm isn't exactly -+ * constant time in either the random guessing or in the hash list lookup. We -+ * could require a minimum of 3 tries, which would successfully mask the -+ * guessing. this would not, however, help with the growing hash lengths, which -+ * is another thing to consider moving forward. -+ */ -+ -+__le32 wg_index_hashtable_insert(struct index_hashtable *table, -+ struct index_hashtable_entry *entry) -+{ -+ struct index_hashtable_entry *existing_entry; -+ -+ spin_lock_bh(&table->lock); -+ hlist_del_init_rcu(&entry->index_hash); -+ spin_unlock_bh(&table->lock); -+ -+ rcu_read_lock_bh(); -+ -+search_unused_slot: -+ /* First we try to find an unused slot, randomly, while unlocked. */ -+ entry->index = (__force __le32)get_random_u32(); -+ hlist_for_each_entry_rcu_bh(existing_entry, -+ index_bucket(table, entry->index), -+ index_hash) { -+ if (existing_entry->index == entry->index) -+ /* If it's already in use, we continue searching. */ -+ goto search_unused_slot; -+ } -+ -+ /* Once we've found an unused slot, we lock it, and then double-check -+ * that nobody else stole it from us. -+ */ -+ spin_lock_bh(&table->lock); -+ hlist_for_each_entry_rcu_bh(existing_entry, -+ index_bucket(table, entry->index), -+ index_hash) { -+ if (existing_entry->index == entry->index) { -+ spin_unlock_bh(&table->lock); -+ /* If it was stolen, we start over. */ -+ goto search_unused_slot; -+ } -+ } -+ /* Otherwise, we know we have it exclusively (since we're locked), -+ * so we insert. -+ */ -+ hlist_add_head_rcu(&entry->index_hash, -+ index_bucket(table, entry->index)); -+ spin_unlock_bh(&table->lock); -+ -+ rcu_read_unlock_bh(); -+ -+ return entry->index; -+} -+ -+bool wg_index_hashtable_replace(struct index_hashtable *table, -+ struct index_hashtable_entry *old, -+ struct index_hashtable_entry *new) -+{ -+ if (unlikely(hlist_unhashed(&old->index_hash))) -+ return false; -+ spin_lock_bh(&table->lock); -+ new->index = old->index; -+ hlist_replace_rcu(&old->index_hash, &new->index_hash); -+ -+ /* Calling init here NULLs out index_hash, and in fact after this -+ * function returns, it's theoretically possible for this to get -+ * reinserted elsewhere. That means the RCU lookup below might either -+ * terminate early or jump between buckets, in which case the packet -+ * simply gets dropped, which isn't terrible. -+ */ -+ INIT_HLIST_NODE(&old->index_hash); -+ spin_unlock_bh(&table->lock); -+ return true; -+} -+ -+void wg_index_hashtable_remove(struct index_hashtable *table, -+ struct index_hashtable_entry *entry) -+{ -+ spin_lock_bh(&table->lock); -+ hlist_del_init_rcu(&entry->index_hash); -+ spin_unlock_bh(&table->lock); -+} -+ -+/* Returns a strong reference to a entry->peer */ -+struct index_hashtable_entry * -+wg_index_hashtable_lookup(struct index_hashtable *table, -+ const enum index_hashtable_type type_mask, -+ const __le32 index, struct wg_peer **peer) -+{ -+ struct index_hashtable_entry *iter_entry, *entry = NULL; -+ -+ rcu_read_lock_bh(); -+ hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), -+ index_hash) { -+ if (iter_entry->index == index) { -+ if (likely(iter_entry->type & type_mask)) -+ entry = iter_entry; -+ break; -+ } -+ } -+ if (likely(entry)) { -+ entry->peer = wg_peer_get_maybe_zero(entry->peer); -+ if (likely(entry->peer)) -+ *peer = entry->peer; -+ else -+ entry = NULL; -+ } -+ rcu_read_unlock_bh(); -+ return entry; -+} ---- /dev/null -+++ b/drivers/net/wireguard/peerlookup.h -@@ -0,0 +1,64 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_PEERLOOKUP_H -+#define _WG_PEERLOOKUP_H -+ -+#include "messages.h" -+ -+#include <linux/hashtable.h> -+#include <linux/mutex.h> -+#include <linux/siphash.h> -+ -+struct wg_peer; -+ -+struct pubkey_hashtable { -+ /* TODO: move to rhashtable */ -+ DECLARE_HASHTABLE(hashtable, 11); -+ siphash_key_t key; -+ struct mutex lock; -+}; -+ -+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); -+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, -+ struct wg_peer *peer); -+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, -+ struct wg_peer *peer); -+struct wg_peer * -+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, -+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); -+ -+struct index_hashtable { -+ /* TODO: move to rhashtable */ -+ DECLARE_HASHTABLE(hashtable, 13); -+ spinlock_t lock; -+}; -+ -+enum index_hashtable_type { -+ INDEX_HASHTABLE_HANDSHAKE = 1U << 0, -+ INDEX_HASHTABLE_KEYPAIR = 1U << 1 -+}; -+ -+struct index_hashtable_entry { -+ struct wg_peer *peer; -+ struct hlist_node index_hash; -+ enum index_hashtable_type type; -+ __le32 index; -+}; -+ -+struct index_hashtable *wg_index_hashtable_alloc(void); -+__le32 wg_index_hashtable_insert(struct index_hashtable *table, -+ struct index_hashtable_entry *entry); -+bool wg_index_hashtable_replace(struct index_hashtable *table, -+ struct index_hashtable_entry *old, -+ struct index_hashtable_entry *new); -+void wg_index_hashtable_remove(struct index_hashtable *table, -+ struct index_hashtable_entry *entry); -+struct index_hashtable_entry * -+wg_index_hashtable_lookup(struct index_hashtable *table, -+ const enum index_hashtable_type type_mask, -+ const __le32 index, struct wg_peer **peer); -+ -+#endif /* _WG_PEERLOOKUP_H */ ---- /dev/null -+++ b/drivers/net/wireguard/queueing.c -@@ -0,0 +1,53 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "queueing.h" -+ -+struct multicore_worker __percpu * -+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) -+{ -+ int cpu; -+ struct multicore_worker __percpu *worker = -+ alloc_percpu(struct multicore_worker); -+ -+ if (!worker) -+ return NULL; -+ -+ for_each_possible_cpu(cpu) { -+ per_cpu_ptr(worker, cpu)->ptr = ptr; -+ INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); -+ } -+ return worker; -+} -+ -+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, -+ bool multicore, unsigned int len) -+{ -+ int ret; -+ -+ memset(queue, 0, sizeof(*queue)); -+ ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); -+ if (ret) -+ return ret; -+ if (function) { -+ if (multicore) { -+ queue->worker = wg_packet_percpu_multicore_worker_alloc( -+ function, queue); -+ if (!queue->worker) -+ return -ENOMEM; -+ } else { -+ INIT_WORK(&queue->work, function); -+ } -+ } -+ return 0; -+} -+ -+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) -+{ -+ if (multicore) -+ free_percpu(queue->worker); -+ WARN_ON(!__ptr_ring_empty(&queue->ring)); -+ ptr_ring_cleanup(&queue->ring, NULL); -+} ---- /dev/null -+++ b/drivers/net/wireguard/queueing.h -@@ -0,0 +1,197 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_QUEUEING_H -+#define _WG_QUEUEING_H -+ -+#include "peer.h" -+#include <linux/types.h> -+#include <linux/skbuff.h> -+#include <linux/ip.h> -+#include <linux/ipv6.h> -+ -+struct wg_device; -+struct wg_peer; -+struct multicore_worker; -+struct crypt_queue; -+struct sk_buff; -+ -+/* queueing.c APIs: */ -+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, -+ bool multicore, unsigned int len); -+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); -+struct multicore_worker __percpu * -+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); -+ -+/* receive.c APIs: */ -+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); -+void wg_packet_handshake_receive_worker(struct work_struct *work); -+/* NAPI poll function: */ -+int wg_packet_rx_poll(struct napi_struct *napi, int budget); -+/* Workqueue worker: */ -+void wg_packet_decrypt_worker(struct work_struct *work); -+ -+/* send.c APIs: */ -+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, -+ bool is_retry); -+void wg_packet_send_handshake_response(struct wg_peer *peer); -+void wg_packet_send_handshake_cookie(struct wg_device *wg, -+ struct sk_buff *initiating_skb, -+ __le32 sender_index); -+void wg_packet_send_keepalive(struct wg_peer *peer); -+void wg_packet_purge_staged_packets(struct wg_peer *peer); -+void wg_packet_send_staged_packets(struct wg_peer *peer); -+/* Workqueue workers: */ -+void wg_packet_handshake_send_worker(struct work_struct *work); -+void wg_packet_tx_worker(struct work_struct *work); -+void wg_packet_encrypt_worker(struct work_struct *work); -+ -+enum packet_state { -+ PACKET_STATE_UNCRYPTED, -+ PACKET_STATE_CRYPTED, -+ PACKET_STATE_DEAD -+}; -+ -+struct packet_cb { -+ u64 nonce; -+ struct noise_keypair *keypair; -+ atomic_t state; -+ u32 mtu; -+ u8 ds; -+}; -+ -+#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) -+#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) -+ -+/* Returns either the correct skb->protocol value, or 0 if invalid. */ -+static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) -+{ -+ if (skb_network_header(skb) >= skb->head && -+ (skb_network_header(skb) + sizeof(struct iphdr)) <= -+ skb_tail_pointer(skb) && -+ ip_hdr(skb)->version == 4) -+ return htons(ETH_P_IP); -+ if (skb_network_header(skb) >= skb->head && -+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= -+ skb_tail_pointer(skb) && -+ ipv6_hdr(skb)->version == 6) -+ return htons(ETH_P_IPV6); -+ return 0; -+} -+ -+static inline void wg_reset_packet(struct sk_buff *skb) -+{ -+ const int pfmemalloc = skb->pfmemalloc; -+ -+ skb_scrub_packet(skb, true); -+ memset(&skb->headers_start, 0, -+ offsetof(struct sk_buff, headers_end) - -+ offsetof(struct sk_buff, headers_start)); -+ skb->pfmemalloc = pfmemalloc; -+ skb->queue_mapping = 0; -+ skb->nohdr = 0; -+ skb->peeked = 0; -+ skb->mac_len = 0; -+ skb->dev = NULL; -+#ifdef CONFIG_NET_SCHED -+ skb->tc_index = 0; -+#endif -+ skb_reset_redirect(skb); -+ skb->hdr_len = skb_headroom(skb); -+ skb_reset_mac_header(skb); -+ skb_reset_network_header(skb); -+ skb_reset_transport_header(skb); -+ skb_probe_transport_header(skb); -+ skb_reset_inner_headers(skb); -+} -+ -+static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) -+{ -+ unsigned int cpu = *stored_cpu, cpu_index, i; -+ -+ if (unlikely(cpu == nr_cpumask_bits || -+ !cpumask_test_cpu(cpu, cpu_online_mask))) { -+ cpu_index = id % cpumask_weight(cpu_online_mask); -+ cpu = cpumask_first(cpu_online_mask); -+ for (i = 0; i < cpu_index; ++i) -+ cpu = cpumask_next(cpu, cpu_online_mask); -+ *stored_cpu = cpu; -+ } -+ return cpu; -+} -+ -+/* This function is racy, in the sense that next is unlocked, so it could return -+ * the same CPU twice. A race-free version of this would be to instead store an -+ * atomic sequence number, do an increment-and-return, and then iterate through -+ * every possible CPU until we get to that index -- choose_cpu. However that's -+ * a bit slower, and it doesn't seem like this potential race actually -+ * introduces any performance loss, so we live with it. -+ */ -+static inline int wg_cpumask_next_online(int *next) -+{ -+ int cpu = *next; -+ -+ while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) -+ cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; -+ *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; -+ return cpu; -+} -+ -+static inline int wg_queue_enqueue_per_device_and_peer( -+ struct crypt_queue *device_queue, struct crypt_queue *peer_queue, -+ struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) -+{ -+ int cpu; -+ -+ atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); -+ /* We first queue this up for the peer ingestion, but the consumer -+ * will wait for the state to change to CRYPTED or DEAD before. -+ */ -+ if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) -+ return -ENOSPC; -+ /* Then we queue it up in the device queue, which consumes the -+ * packet as soon as it can. -+ */ -+ cpu = wg_cpumask_next_online(next_cpu); -+ if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) -+ return -EPIPE; -+ queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); -+ return 0; -+} -+ -+static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, -+ struct sk_buff *skb, -+ enum packet_state state) -+{ -+ /* We take a reference, because as soon as we call atomic_set, the -+ * peer can be freed from below us. -+ */ -+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); -+ -+ atomic_set_release(&PACKET_CB(skb)->state, state); -+ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, -+ peer->internal_id), -+ peer->device->packet_crypt_wq, &queue->work); -+ wg_peer_put(peer); -+} -+ -+static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, -+ enum packet_state state) -+{ -+ /* We take a reference, because as soon as we call atomic_set, the -+ * peer can be freed from below us. -+ */ -+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); -+ -+ atomic_set_release(&PACKET_CB(skb)->state, state); -+ napi_schedule(&peer->napi); -+ wg_peer_put(peer); -+} -+ -+#ifdef DEBUG -+bool wg_packet_counter_selftest(void); -+#endif -+ -+#endif /* _WG_QUEUEING_H */ ---- /dev/null -+++ b/drivers/net/wireguard/ratelimiter.c -@@ -0,0 +1,223 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "ratelimiter.h" -+#include <linux/siphash.h> -+#include <linux/mm.h> -+#include <linux/slab.h> -+#include <net/ip.h> -+ -+static struct kmem_cache *entry_cache; -+static hsiphash_key_t key; -+static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); -+static DEFINE_MUTEX(init_lock); -+static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ -+static atomic_t total_entries = ATOMIC_INIT(0); -+static unsigned int max_entries, table_size; -+static void wg_ratelimiter_gc_entries(struct work_struct *); -+static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); -+static struct hlist_head *table_v4; -+#if IS_ENABLED(CONFIG_IPV6) -+static struct hlist_head *table_v6; -+#endif -+ -+struct ratelimiter_entry { -+ u64 last_time_ns, tokens, ip; -+ void *net; -+ spinlock_t lock; -+ struct hlist_node hash; -+ struct rcu_head rcu; -+}; -+ -+enum { -+ PACKETS_PER_SECOND = 20, -+ PACKETS_BURSTABLE = 5, -+ PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, -+ TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE -+}; -+ -+static void entry_free(struct rcu_head *rcu) -+{ -+ kmem_cache_free(entry_cache, -+ container_of(rcu, struct ratelimiter_entry, rcu)); -+ atomic_dec(&total_entries); -+} -+ -+static void entry_uninit(struct ratelimiter_entry *entry) -+{ -+ hlist_del_rcu(&entry->hash); -+ call_rcu(&entry->rcu, entry_free); -+} -+ -+/* Calling this function with a NULL work uninits all entries. */ -+static void wg_ratelimiter_gc_entries(struct work_struct *work) -+{ -+ const u64 now = ktime_get_coarse_boottime_ns(); -+ struct ratelimiter_entry *entry; -+ struct hlist_node *temp; -+ unsigned int i; -+ -+ for (i = 0; i < table_size; ++i) { -+ spin_lock(&table_lock); -+ hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { -+ if (unlikely(!work) || -+ now - entry->last_time_ns > NSEC_PER_SEC) -+ entry_uninit(entry); -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { -+ if (unlikely(!work) || -+ now - entry->last_time_ns > NSEC_PER_SEC) -+ entry_uninit(entry); -+ } -+#endif -+ spin_unlock(&table_lock); -+ if (likely(work)) -+ cond_resched(); -+ } -+ if (likely(work)) -+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); -+} -+ -+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) -+{ -+ /* We only take the bottom half of the net pointer, so that we can hash -+ * 3 words in the end. This way, siphash's len param fits into the final -+ * u32, and we don't incur an extra round. -+ */ -+ const u32 net_word = (unsigned long)net; -+ struct ratelimiter_entry *entry; -+ struct hlist_head *bucket; -+ u64 ip; -+ -+ if (skb->protocol == htons(ETH_P_IP)) { -+ ip = (u64 __force)ip_hdr(skb)->saddr; -+ bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & -+ (table_size - 1)]; -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ else if (skb->protocol == htons(ETH_P_IPV6)) { -+ /* Only use 64 bits, so as to ratelimit the whole /64. */ -+ memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); -+ bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & -+ (table_size - 1)]; -+ } -+#endif -+ else -+ return false; -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(entry, bucket, hash) { -+ if (entry->net == net && entry->ip == ip) { -+ u64 now, tokens; -+ bool ret; -+ /* Quasi-inspired by nft_limit.c, but this is actually a -+ * slightly different algorithm. Namely, we incorporate -+ * the burst as part of the maximum tokens, rather than -+ * as part of the rate. -+ */ -+ spin_lock(&entry->lock); -+ now = ktime_get_coarse_boottime_ns(); -+ tokens = min_t(u64, TOKEN_MAX, -+ entry->tokens + now - -+ entry->last_time_ns); -+ entry->last_time_ns = now; -+ ret = tokens >= PACKET_COST; -+ entry->tokens = ret ? tokens - PACKET_COST : tokens; -+ spin_unlock(&entry->lock); -+ rcu_read_unlock(); -+ return ret; -+ } -+ } -+ rcu_read_unlock(); -+ -+ if (atomic_inc_return(&total_entries) > max_entries) -+ goto err_oom; -+ -+ entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); -+ if (unlikely(!entry)) -+ goto err_oom; -+ -+ entry->net = net; -+ entry->ip = ip; -+ INIT_HLIST_NODE(&entry->hash); -+ spin_lock_init(&entry->lock); -+ entry->last_time_ns = ktime_get_coarse_boottime_ns(); -+ entry->tokens = TOKEN_MAX - PACKET_COST; -+ spin_lock(&table_lock); -+ hlist_add_head_rcu(&entry->hash, bucket); -+ spin_unlock(&table_lock); -+ return true; -+ -+err_oom: -+ atomic_dec(&total_entries); -+ return false; -+} -+ -+int wg_ratelimiter_init(void) -+{ -+ mutex_lock(&init_lock); -+ if (++init_refcnt != 1) -+ goto out; -+ -+ entry_cache = KMEM_CACHE(ratelimiter_entry, 0); -+ if (!entry_cache) -+ goto err; -+ -+ /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, -+ * but what it shares in common is that it uses a massive hashtable. So, -+ * we borrow their wisdom about good table sizes on different systems -+ * dependent on RAM. This calculation here comes from there. -+ */ -+ table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : -+ max_t(unsigned long, 16, roundup_pow_of_two( -+ (totalram_pages() << PAGE_SHIFT) / -+ (1U << 14) / sizeof(struct hlist_head))); -+ max_entries = table_size * 8; -+ -+ table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); -+ if (unlikely(!table_v4)) -+ goto err_kmemcache; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); -+ if (unlikely(!table_v6)) { -+ kvfree(table_v4); -+ goto err_kmemcache; -+ } -+#endif -+ -+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); -+ get_random_bytes(&key, sizeof(key)); -+out: -+ mutex_unlock(&init_lock); -+ return 0; -+ -+err_kmemcache: -+ kmem_cache_destroy(entry_cache); -+err: -+ --init_refcnt; -+ mutex_unlock(&init_lock); -+ return -ENOMEM; -+} -+ -+void wg_ratelimiter_uninit(void) -+{ -+ mutex_lock(&init_lock); -+ if (!init_refcnt || --init_refcnt) -+ goto out; -+ -+ cancel_delayed_work_sync(&gc_work); -+ wg_ratelimiter_gc_entries(NULL); -+ rcu_barrier(); -+ kvfree(table_v4); -+#if IS_ENABLED(CONFIG_IPV6) -+ kvfree(table_v6); -+#endif -+ kmem_cache_destroy(entry_cache); -+out: -+ mutex_unlock(&init_lock); -+} -+ -+#include "selftest/ratelimiter.c" ---- /dev/null -+++ b/drivers/net/wireguard/ratelimiter.h -@@ -0,0 +1,19 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_RATELIMITER_H -+#define _WG_RATELIMITER_H -+ -+#include <linux/skbuff.h> -+ -+int wg_ratelimiter_init(void); -+void wg_ratelimiter_uninit(void); -+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); -+ -+#ifdef DEBUG -+bool wg_ratelimiter_selftest(void); -+#endif -+ -+#endif /* _WG_RATELIMITER_H */ ---- /dev/null -+++ b/drivers/net/wireguard/receive.c -@@ -0,0 +1,595 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "queueing.h" -+#include "device.h" -+#include "peer.h" -+#include "timers.h" -+#include "messages.h" -+#include "cookie.h" -+#include "socket.h" -+ -+#include <linux/ip.h> -+#include <linux/ipv6.h> -+#include <linux/udp.h> -+#include <net/ip_tunnels.h> -+ -+/* Must be called with bh disabled. */ -+static void update_rx_stats(struct wg_peer *peer, size_t len) -+{ -+ struct pcpu_sw_netstats *tstats = -+ get_cpu_ptr(peer->device->dev->tstats); -+ -+ u64_stats_update_begin(&tstats->syncp); -+ ++tstats->rx_packets; -+ tstats->rx_bytes += len; -+ peer->rx_bytes += len; -+ u64_stats_update_end(&tstats->syncp); -+ put_cpu_ptr(tstats); -+} -+ -+#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) -+ -+static size_t validate_header_len(struct sk_buff *skb) -+{ -+ if (unlikely(skb->len < sizeof(struct message_header))) -+ return 0; -+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && -+ skb->len >= MESSAGE_MINIMUM_LENGTH) -+ return sizeof(struct message_data); -+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && -+ skb->len == sizeof(struct message_handshake_initiation)) -+ return sizeof(struct message_handshake_initiation); -+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && -+ skb->len == sizeof(struct message_handshake_response)) -+ return sizeof(struct message_handshake_response); -+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && -+ skb->len == sizeof(struct message_handshake_cookie)) -+ return sizeof(struct message_handshake_cookie); -+ return 0; -+} -+ -+static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) -+{ -+ size_t data_offset, data_len, header_len; -+ struct udphdr *udp; -+ -+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || -+ skb_transport_header(skb) < skb->head || -+ (skb_transport_header(skb) + sizeof(struct udphdr)) > -+ skb_tail_pointer(skb))) -+ return -EINVAL; /* Bogus IP header */ -+ udp = udp_hdr(skb); -+ data_offset = (u8 *)udp - skb->data; -+ if (unlikely(data_offset > U16_MAX || -+ data_offset + sizeof(struct udphdr) > skb->len)) -+ /* Packet has offset at impossible location or isn't big enough -+ * to have UDP fields. -+ */ -+ return -EINVAL; -+ data_len = ntohs(udp->len); -+ if (unlikely(data_len < sizeof(struct udphdr) || -+ data_len > skb->len - data_offset)) -+ /* UDP packet is reporting too small of a size or lying about -+ * its size. -+ */ -+ return -EINVAL; -+ data_len -= sizeof(struct udphdr); -+ data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; -+ if (unlikely(!pskb_may_pull(skb, -+ data_offset + sizeof(struct message_header)) || -+ pskb_trim(skb, data_len + data_offset) < 0)) -+ return -EINVAL; -+ skb_pull(skb, data_offset); -+ if (unlikely(skb->len != data_len)) -+ /* Final len does not agree with calculated len */ -+ return -EINVAL; -+ header_len = validate_header_len(skb); -+ if (unlikely(!header_len)) -+ return -EINVAL; -+ __skb_push(skb, data_offset); -+ if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) -+ return -EINVAL; -+ __skb_pull(skb, data_offset); -+ return 0; -+} -+ -+static void wg_receive_handshake_packet(struct wg_device *wg, -+ struct sk_buff *skb) -+{ -+ enum cookie_mac_state mac_state; -+ struct wg_peer *peer = NULL; -+ /* This is global, so that our load calculation applies to the whole -+ * system. We don't care about races with it at all. -+ */ -+ static u64 last_under_load; -+ bool packet_needs_cookie; -+ bool under_load; -+ -+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { -+ net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", -+ wg->dev->name, skb); -+ wg_cookie_message_consume( -+ (struct message_handshake_cookie *)skb->data, wg); -+ return; -+ } -+ -+ under_load = skb_queue_len(&wg->incoming_handshakes) >= -+ MAX_QUEUED_INCOMING_HANDSHAKES / 8; -+ if (under_load) -+ last_under_load = ktime_get_coarse_boottime_ns(); -+ else if (last_under_load) -+ under_load = !wg_birthdate_has_expired(last_under_load, 1); -+ mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, -+ under_load); -+ if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || -+ (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { -+ packet_needs_cookie = false; -+ } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { -+ packet_needs_cookie = true; -+ } else { -+ net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", -+ wg->dev->name, skb); -+ return; -+ } -+ -+ switch (SKB_TYPE_LE32(skb)) { -+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { -+ struct message_handshake_initiation *message = -+ (struct message_handshake_initiation *)skb->data; -+ -+ if (packet_needs_cookie) { -+ wg_packet_send_handshake_cookie(wg, skb, -+ message->sender_index); -+ return; -+ } -+ peer = wg_noise_handshake_consume_initiation(message, wg); -+ if (unlikely(!peer)) { -+ net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", -+ wg->dev->name, skb); -+ return; -+ } -+ wg_socket_set_peer_endpoint_from_skb(peer, skb); -+ net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", -+ wg->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ wg_packet_send_handshake_response(peer); -+ break; -+ } -+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { -+ struct message_handshake_response *message = -+ (struct message_handshake_response *)skb->data; -+ -+ if (packet_needs_cookie) { -+ wg_packet_send_handshake_cookie(wg, skb, -+ message->sender_index); -+ return; -+ } -+ peer = wg_noise_handshake_consume_response(message, wg); -+ if (unlikely(!peer)) { -+ net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", -+ wg->dev->name, skb); -+ return; -+ } -+ wg_socket_set_peer_endpoint_from_skb(peer, skb); -+ net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", -+ wg->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ if (wg_noise_handshake_begin_session(&peer->handshake, -+ &peer->keypairs)) { -+ wg_timers_session_derived(peer); -+ wg_timers_handshake_complete(peer); -+ /* Calling this function will either send any existing -+ * packets in the queue and not send a keepalive, which -+ * is the best case, Or, if there's nothing in the -+ * queue, it will send a keepalive, in order to give -+ * immediate confirmation of the session. -+ */ -+ wg_packet_send_keepalive(peer); -+ } -+ break; -+ } -+ } -+ -+ if (unlikely(!peer)) { -+ WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); -+ return; -+ } -+ -+ local_bh_disable(); -+ update_rx_stats(peer, skb->len); -+ local_bh_enable(); -+ -+ wg_timers_any_authenticated_packet_received(peer); -+ wg_timers_any_authenticated_packet_traversal(peer); -+ wg_peer_put(peer); -+} -+ -+void wg_packet_handshake_receive_worker(struct work_struct *work) -+{ -+ struct wg_device *wg = container_of(work, struct multicore_worker, -+ work)->ptr; -+ struct sk_buff *skb; -+ -+ while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { -+ wg_receive_handshake_packet(wg, skb); -+ dev_kfree_skb(skb); -+ cond_resched(); -+ } -+} -+ -+static void keep_key_fresh(struct wg_peer *peer) -+{ -+ struct noise_keypair *keypair; -+ bool send = false; -+ -+ if (peer->sent_lastminute_handshake) -+ return; -+ -+ rcu_read_lock_bh(); -+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair); -+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && -+ keypair->i_am_the_initiator && -+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, -+ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) -+ send = true; -+ rcu_read_unlock_bh(); -+ -+ if (send) { -+ peer->sent_lastminute_handshake = true; -+ wg_packet_send_queued_handshake_initiation(peer, false); -+ } -+} -+ -+static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) -+{ -+ struct scatterlist sg[MAX_SKB_FRAGS + 8]; -+ struct sk_buff *trailer; -+ unsigned int offset; -+ int num_frags; -+ -+ if (unlikely(!key)) -+ return false; -+ -+ if (unlikely(!READ_ONCE(key->is_valid) || -+ wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || -+ key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { -+ WRITE_ONCE(key->is_valid, false); -+ return false; -+ } -+ -+ PACKET_CB(skb)->nonce = -+ le64_to_cpu(((struct message_data *)skb->data)->counter); -+ -+ /* We ensure that the network header is part of the packet before we -+ * call skb_cow_data, so that there's no chance that data is removed -+ * from the skb, so that later we can extract the original endpoint. -+ */ -+ offset = skb->data - skb_network_header(skb); -+ skb_push(skb, offset); -+ num_frags = skb_cow_data(skb, 0, &trailer); -+ offset += sizeof(struct message_data); -+ skb_pull(skb, offset); -+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) -+ return false; -+ -+ sg_init_table(sg, num_frags); -+ if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) -+ return false; -+ -+ if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, -+ PACKET_CB(skb)->nonce, -+ key->key)) -+ return false; -+ -+ /* Another ugly situation of pushing and pulling the header so as to -+ * keep endpoint information intact. -+ */ -+ skb_push(skb, offset); -+ if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) -+ return false; -+ skb_pull(skb, offset); -+ -+ return true; -+} -+ -+/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ -+static bool counter_validate(union noise_counter *counter, u64 their_counter) -+{ -+ unsigned long index, index_current, top, i; -+ bool ret = false; -+ -+ spin_lock_bh(&counter->receive.lock); -+ -+ if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || -+ their_counter >= REJECT_AFTER_MESSAGES)) -+ goto out; -+ -+ ++their_counter; -+ -+ if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < -+ counter->receive.counter)) -+ goto out; -+ -+ index = their_counter >> ilog2(BITS_PER_LONG); -+ -+ if (likely(their_counter > counter->receive.counter)) { -+ index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); -+ top = min_t(unsigned long, index - index_current, -+ COUNTER_BITS_TOTAL / BITS_PER_LONG); -+ for (i = 1; i <= top; ++i) -+ counter->receive.backtrack[(i + index_current) & -+ ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; -+ counter->receive.counter = their_counter; -+ } -+ -+ index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; -+ ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), -+ &counter->receive.backtrack[index]); -+ -+out: -+ spin_unlock_bh(&counter->receive.lock); -+ return ret; -+} -+ -+#include "selftest/counter.c" -+ -+static void wg_packet_consume_data_done(struct wg_peer *peer, -+ struct sk_buff *skb, -+ struct endpoint *endpoint) -+{ -+ struct net_device *dev = peer->device->dev; -+ unsigned int len, len_before_trim; -+ struct wg_peer *routed_peer; -+ -+ wg_socket_set_peer_endpoint(peer, endpoint); -+ -+ if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, -+ PACKET_CB(skb)->keypair))) { -+ wg_timers_handshake_complete(peer); -+ wg_packet_send_staged_packets(peer); -+ } -+ -+ keep_key_fresh(peer); -+ -+ wg_timers_any_authenticated_packet_received(peer); -+ wg_timers_any_authenticated_packet_traversal(peer); -+ -+ /* A packet with length 0 is a keepalive packet */ -+ if (unlikely(!skb->len)) { -+ update_rx_stats(peer, message_data_len(0)); -+ net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", -+ dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ goto packet_processed; -+ } -+ -+ wg_timers_data_received(peer); -+ -+ if (unlikely(skb_network_header(skb) < skb->head)) -+ goto dishonest_packet_size; -+ if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && -+ (ip_hdr(skb)->version == 4 || -+ (ip_hdr(skb)->version == 6 && -+ pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) -+ goto dishonest_packet_type; -+ -+ skb->dev = dev; -+ /* We've already verified the Poly1305 auth tag, which means this packet -+ * was not modified in transit. We can therefore tell the networking -+ * stack that all checksums of every layer of encapsulation have already -+ * been checked "by the hardware" and therefore is unneccessary to check -+ * again in software. -+ */ -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ skb->csum_level = ~0; /* All levels */ -+ skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); -+ if (skb->protocol == htons(ETH_P_IP)) { -+ len = ntohs(ip_hdr(skb)->tot_len); -+ if (unlikely(len < sizeof(struct iphdr))) -+ goto dishonest_packet_size; -+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) -+ IP_ECN_set_ce(ip_hdr(skb)); -+ } else if (skb->protocol == htons(ETH_P_IPV6)) { -+ len = ntohs(ipv6_hdr(skb)->payload_len) + -+ sizeof(struct ipv6hdr); -+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) -+ IP6_ECN_set_ce(skb, ipv6_hdr(skb)); -+ } else { -+ goto dishonest_packet_type; -+ } -+ -+ if (unlikely(len > skb->len)) -+ goto dishonest_packet_size; -+ len_before_trim = skb->len; -+ if (unlikely(pskb_trim(skb, len))) -+ goto packet_processed; -+ -+ routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, -+ skb); -+ wg_peer_put(routed_peer); /* We don't need the extra reference. */ -+ -+ if (unlikely(routed_peer != peer)) -+ goto dishonest_packet_peer; -+ -+ if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { -+ ++dev->stats.rx_dropped; -+ net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", -+ dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ } else { -+ update_rx_stats(peer, message_data_len(len_before_trim)); -+ } -+ return; -+ -+dishonest_packet_peer: -+ net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", -+ dev->name, skb, peer->internal_id, -+ &peer->endpoint.addr); -+ ++dev->stats.rx_errors; -+ ++dev->stats.rx_frame_errors; -+ goto packet_processed; -+dishonest_packet_type: -+ net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", -+ dev->name, peer->internal_id, &peer->endpoint.addr); -+ ++dev->stats.rx_errors; -+ ++dev->stats.rx_frame_errors; -+ goto packet_processed; -+dishonest_packet_size: -+ net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", -+ dev->name, peer->internal_id, &peer->endpoint.addr); -+ ++dev->stats.rx_errors; -+ ++dev->stats.rx_length_errors; -+ goto packet_processed; -+packet_processed: -+ dev_kfree_skb(skb); -+} -+ -+int wg_packet_rx_poll(struct napi_struct *napi, int budget) -+{ -+ struct wg_peer *peer = container_of(napi, struct wg_peer, napi); -+ struct crypt_queue *queue = &peer->rx_queue; -+ struct noise_keypair *keypair; -+ struct endpoint endpoint; -+ enum packet_state state; -+ struct sk_buff *skb; -+ int work_done = 0; -+ bool free; -+ -+ if (unlikely(budget <= 0)) -+ return 0; -+ -+ while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && -+ (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != -+ PACKET_STATE_UNCRYPTED) { -+ __ptr_ring_discard_one(&queue->ring); -+ peer = PACKET_PEER(skb); -+ keypair = PACKET_CB(skb)->keypair; -+ free = true; -+ -+ if (unlikely(state != PACKET_STATE_CRYPTED)) -+ goto next; -+ -+ if (unlikely(!counter_validate(&keypair->receiving.counter, -+ PACKET_CB(skb)->nonce))) { -+ net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", -+ peer->device->dev->name, -+ PACKET_CB(skb)->nonce, -+ keypair->receiving.counter.receive.counter); -+ goto next; -+ } -+ -+ if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) -+ goto next; -+ -+ wg_reset_packet(skb); -+ wg_packet_consume_data_done(peer, skb, &endpoint); -+ free = false; -+ -+next: -+ wg_noise_keypair_put(keypair, false); -+ wg_peer_put(peer); -+ if (unlikely(free)) -+ dev_kfree_skb(skb); -+ -+ if (++work_done >= budget) -+ break; -+ } -+ -+ if (work_done < budget) -+ napi_complete_done(napi, work_done); -+ -+ return work_done; -+} -+ -+void wg_packet_decrypt_worker(struct work_struct *work) -+{ -+ struct crypt_queue *queue = container_of(work, struct multicore_worker, -+ work)->ptr; -+ struct sk_buff *skb; -+ -+ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { -+ enum packet_state state = likely(decrypt_packet(skb, -+ &PACKET_CB(skb)->keypair->receiving)) ? -+ PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; -+ wg_queue_enqueue_per_peer_napi(skb, state); -+ } -+} -+ -+static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) -+{ -+ __le32 idx = ((struct message_data *)skb->data)->key_idx; -+ struct wg_peer *peer = NULL; -+ int ret; -+ -+ rcu_read_lock_bh(); -+ PACKET_CB(skb)->keypair = -+ (struct noise_keypair *)wg_index_hashtable_lookup( -+ wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, -+ &peer); -+ if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) -+ goto err_keypair; -+ -+ if (unlikely(READ_ONCE(peer->is_dead))) -+ goto err; -+ -+ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, -+ &peer->rx_queue, skb, -+ wg->packet_crypt_wq, -+ &wg->decrypt_queue.last_cpu); -+ if (unlikely(ret == -EPIPE)) -+ wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); -+ if (likely(!ret || ret == -EPIPE)) { -+ rcu_read_unlock_bh(); -+ return; -+ } -+err: -+ wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); -+err_keypair: -+ rcu_read_unlock_bh(); -+ wg_peer_put(peer); -+ dev_kfree_skb(skb); -+} -+ -+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) -+{ -+ if (unlikely(prepare_skb_header(skb, wg) < 0)) -+ goto err; -+ switch (SKB_TYPE_LE32(skb)) { -+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): -+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): -+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { -+ int cpu; -+ -+ if (skb_queue_len(&wg->incoming_handshakes) > -+ MAX_QUEUED_INCOMING_HANDSHAKES || -+ unlikely(!rng_is_initialized())) { -+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", -+ wg->dev->name, skb); -+ goto err; -+ } -+ skb_queue_tail(&wg->incoming_handshakes, skb); -+ /* Queues up a call to packet_process_queued_handshake_ -+ * packets(skb): -+ */ -+ cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); -+ queue_work_on(cpu, wg->handshake_receive_wq, -+ &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); -+ break; -+ } -+ case cpu_to_le32(MESSAGE_DATA): -+ PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); -+ wg_packet_consume_data(wg, skb); -+ break; -+ default: -+ net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", -+ wg->dev->name, skb); -+ goto err; -+ } -+ return; -+ -+err: -+ dev_kfree_skb(skb); -+} ---- /dev/null -+++ b/drivers/net/wireguard/selftest/allowedips.c -@@ -0,0 +1,683 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * This contains some basic static unit tests for the allowedips data structure. -+ * It also has two additional modes that are disabled and meant to be used by -+ * folks directly playing with this file. If you define the macro -+ * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in -+ * memory, it will be printed out as KERN_DEBUG in a format that can be passed -+ * to graphviz (the dot command) to visualize it. If you define the macro -+ * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of -+ * randomized tests done against a trivial implementation, which may take -+ * upwards of a half-hour to complete. There's no set of users who should be -+ * enabling these, and the only developers that should go anywhere near these -+ * nobs are the ones who are reading this comment. -+ */ -+ -+#ifdef DEBUG -+ -+#include <linux/siphash.h> -+ -+static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, -+ u8 cidr) -+{ -+ swap_endian(dst, src, bits); -+ memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); -+ if (cidr) -+ dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); -+} -+ -+static __init void print_node(struct allowedips_node *node, u8 bits) -+{ -+ char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; -+ char *fmt_declaration = KERN_DEBUG -+ "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; -+ char *style = "dotted"; -+ u8 ip1[16], ip2[16]; -+ u32 color = 0; -+ -+ if (bits == 32) { -+ fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; -+ fmt_declaration = KERN_DEBUG -+ "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; -+ } else if (bits == 128) { -+ fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; -+ fmt_declaration = KERN_DEBUG -+ "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; -+ } -+ if (node->peer) { -+ hsiphash_key_t key = { { 0 } }; -+ -+ memcpy(&key, &node->peer, sizeof(node->peer)); -+ color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | -+ hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | -+ hsiphash_1u32(0xabad1dea, &key) % 200; -+ style = "bold"; -+ } -+ swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); -+ printk(fmt_declaration, ip1, node->cidr, style, color); -+ if (node->bit[0]) { -+ swap_endian_and_apply_cidr(ip2, -+ rcu_dereference_raw(node->bit[0])->bits, bits, -+ node->cidr); -+ printk(fmt_connection, ip1, node->cidr, ip2, -+ rcu_dereference_raw(node->bit[0])->cidr); -+ print_node(rcu_dereference_raw(node->bit[0]), bits); -+ } -+ if (node->bit[1]) { -+ swap_endian_and_apply_cidr(ip2, -+ rcu_dereference_raw(node->bit[1])->bits, -+ bits, node->cidr); -+ printk(fmt_connection, ip1, node->cidr, ip2, -+ rcu_dereference_raw(node->bit[1])->cidr); -+ print_node(rcu_dereference_raw(node->bit[1]), bits); -+ } -+} -+ -+static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) -+{ -+ printk(KERN_DEBUG "digraph trie {\n"); -+ print_node(rcu_dereference_raw(top), bits); -+ printk(KERN_DEBUG "}\n"); -+} -+ -+enum { -+ NUM_PEERS = 2000, -+ NUM_RAND_ROUTES = 400, -+ NUM_MUTATED_ROUTES = 100, -+ NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 -+}; -+ -+struct horrible_allowedips { -+ struct hlist_head head; -+}; -+ -+struct horrible_allowedips_node { -+ struct hlist_node table; -+ union nf_inet_addr ip; -+ union nf_inet_addr mask; -+ u8 ip_version; -+ void *value; -+}; -+ -+static __init void horrible_allowedips_init(struct horrible_allowedips *table) -+{ -+ INIT_HLIST_HEAD(&table->head); -+} -+ -+static __init void horrible_allowedips_free(struct horrible_allowedips *table) -+{ -+ struct horrible_allowedips_node *node; -+ struct hlist_node *h; -+ -+ hlist_for_each_entry_safe(node, h, &table->head, table) { -+ hlist_del(&node->table); -+ kfree(node); -+ } -+} -+ -+static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) -+{ -+ union nf_inet_addr mask; -+ -+ memset(&mask, 0x00, 128 / 8); -+ memset(&mask, 0xff, cidr / 8); -+ if (cidr % 32) -+ mask.all[cidr / 32] = (__force u32)htonl( -+ (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); -+ return mask; -+} -+ -+static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) -+{ -+ return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + -+ hweight32(subnet.all[2]) + hweight32(subnet.all[3]); -+} -+ -+static __init inline void -+horrible_mask_self(struct horrible_allowedips_node *node) -+{ -+ if (node->ip_version == 4) { -+ node->ip.ip &= node->mask.ip; -+ } else if (node->ip_version == 6) { -+ node->ip.ip6[0] &= node->mask.ip6[0]; -+ node->ip.ip6[1] &= node->mask.ip6[1]; -+ node->ip.ip6[2] &= node->mask.ip6[2]; -+ node->ip.ip6[3] &= node->mask.ip6[3]; -+ } -+} -+ -+static __init inline bool -+horrible_match_v4(const struct horrible_allowedips_node *node, -+ struct in_addr *ip) -+{ -+ return (ip->s_addr & node->mask.ip) == node->ip.ip; -+} -+ -+static __init inline bool -+horrible_match_v6(const struct horrible_allowedips_node *node, -+ struct in6_addr *ip) -+{ -+ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == -+ node->ip.ip6[0] && -+ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == -+ node->ip.ip6[1] && -+ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == -+ node->ip.ip6[2] && -+ (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; -+} -+ -+static __init void -+horrible_insert_ordered(struct horrible_allowedips *table, -+ struct horrible_allowedips_node *node) -+{ -+ struct horrible_allowedips_node *other = NULL, *where = NULL; -+ u8 my_cidr = horrible_mask_to_cidr(node->mask); -+ -+ hlist_for_each_entry(other, &table->head, table) { -+ if (!memcmp(&other->mask, &node->mask, -+ sizeof(union nf_inet_addr)) && -+ !memcmp(&other->ip, &node->ip, -+ sizeof(union nf_inet_addr)) && -+ other->ip_version == node->ip_version) { -+ other->value = node->value; -+ kfree(node); -+ return; -+ } -+ where = other; -+ if (horrible_mask_to_cidr(other->mask) <= my_cidr) -+ break; -+ } -+ if (!other && !where) -+ hlist_add_head(&node->table, &table->head); -+ else if (!other) -+ hlist_add_behind(&node->table, &where->table); -+ else -+ hlist_add_before(&node->table, &where->table); -+} -+ -+static __init int -+horrible_allowedips_insert_v4(struct horrible_allowedips *table, -+ struct in_addr *ip, u8 cidr, void *value) -+{ -+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), -+ GFP_KERNEL); -+ -+ if (unlikely(!node)) -+ return -ENOMEM; -+ node->ip.in = *ip; -+ node->mask = horrible_cidr_to_mask(cidr); -+ node->ip_version = 4; -+ node->value = value; -+ horrible_mask_self(node); -+ horrible_insert_ordered(table, node); -+ return 0; -+} -+ -+static __init int -+horrible_allowedips_insert_v6(struct horrible_allowedips *table, -+ struct in6_addr *ip, u8 cidr, void *value) -+{ -+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), -+ GFP_KERNEL); -+ -+ if (unlikely(!node)) -+ return -ENOMEM; -+ node->ip.in6 = *ip; -+ node->mask = horrible_cidr_to_mask(cidr); -+ node->ip_version = 6; -+ node->value = value; -+ horrible_mask_self(node); -+ horrible_insert_ordered(table, node); -+ return 0; -+} -+ -+static __init void * -+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, -+ struct in_addr *ip) -+{ -+ struct horrible_allowedips_node *node; -+ void *ret = NULL; -+ -+ hlist_for_each_entry(node, &table->head, table) { -+ if (node->ip_version != 4) -+ continue; -+ if (horrible_match_v4(node, ip)) { -+ ret = node->value; -+ break; -+ } -+ } -+ return ret; -+} -+ -+static __init void * -+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, -+ struct in6_addr *ip) -+{ -+ struct horrible_allowedips_node *node; -+ void *ret = NULL; -+ -+ hlist_for_each_entry(node, &table->head, table) { -+ if (node->ip_version != 6) -+ continue; -+ if (horrible_match_v6(node, ip)) { -+ ret = node->value; -+ break; -+ } -+ } -+ return ret; -+} -+ -+static __init bool randomized_test(void) -+{ -+ unsigned int i, j, k, mutate_amount, cidr; -+ u8 ip[16], mutate_mask[16], mutated[16]; -+ struct wg_peer **peers, *peer; -+ struct horrible_allowedips h; -+ DEFINE_MUTEX(mutex); -+ struct allowedips t; -+ bool ret = false; -+ -+ mutex_init(&mutex); -+ -+ wg_allowedips_init(&t); -+ horrible_allowedips_init(&h); -+ -+ peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); -+ if (unlikely(!peers)) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free; -+ } -+ for (i = 0; i < NUM_PEERS; ++i) { -+ peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); -+ if (unlikely(!peers[i])) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free; -+ } -+ kref_init(&peers[i]->refcount); -+ } -+ -+ mutex_lock(&mutex); -+ -+ for (i = 0; i < NUM_RAND_ROUTES; ++i) { -+ prandom_bytes(ip, 4); -+ cidr = prandom_u32_max(32) + 1; -+ peer = peers[prandom_u32_max(NUM_PEERS)]; -+ if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, -+ peer, &mutex) < 0) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, -+ cidr, peer) < 0) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { -+ memcpy(mutated, ip, 4); -+ prandom_bytes(mutate_mask, 4); -+ mutate_amount = prandom_u32_max(32); -+ for (k = 0; k < mutate_amount / 8; ++k) -+ mutate_mask[k] = 0xff; -+ mutate_mask[k] = 0xff -+ << ((8 - (mutate_amount % 8)) % 8); -+ for (; k < 4; ++k) -+ mutate_mask[k] = 0; -+ for (k = 0; k < 4; ++k) -+ mutated[k] = (mutated[k] & mutate_mask[k]) | -+ (~mutate_mask[k] & -+ prandom_u32_max(256)); -+ cidr = prandom_u32_max(32) + 1; -+ peer = peers[prandom_u32_max(NUM_PEERS)]; -+ if (wg_allowedips_insert_v4(&t, -+ (struct in_addr *)mutated, -+ cidr, peer, &mutex) < 0) { -+ pr_err("allowedips random malloc: FAIL\n"); -+ goto free_locked; -+ } -+ if (horrible_allowedips_insert_v4(&h, -+ (struct in_addr *)mutated, cidr, peer)) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ } -+ } -+ -+ for (i = 0; i < NUM_RAND_ROUTES; ++i) { -+ prandom_bytes(ip, 16); -+ cidr = prandom_u32_max(128) + 1; -+ peer = peers[prandom_u32_max(NUM_PEERS)]; -+ if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, -+ peer, &mutex) < 0) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, -+ cidr, peer) < 0) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { -+ memcpy(mutated, ip, 16); -+ prandom_bytes(mutate_mask, 16); -+ mutate_amount = prandom_u32_max(128); -+ for (k = 0; k < mutate_amount / 8; ++k) -+ mutate_mask[k] = 0xff; -+ mutate_mask[k] = 0xff -+ << ((8 - (mutate_amount % 8)) % 8); -+ for (; k < 4; ++k) -+ mutate_mask[k] = 0; -+ for (k = 0; k < 4; ++k) -+ mutated[k] = (mutated[k] & mutate_mask[k]) | -+ (~mutate_mask[k] & -+ prandom_u32_max(256)); -+ cidr = prandom_u32_max(128) + 1; -+ peer = peers[prandom_u32_max(NUM_PEERS)]; -+ if (wg_allowedips_insert_v6(&t, -+ (struct in6_addr *)mutated, -+ cidr, peer, &mutex) < 0) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ if (horrible_allowedips_insert_v6( -+ &h, (struct in6_addr *)mutated, cidr, -+ peer)) { -+ pr_err("allowedips random self-test malloc: FAIL\n"); -+ goto free_locked; -+ } -+ } -+ } -+ -+ mutex_unlock(&mutex); -+ -+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { -+ print_tree(t.root4, 32); -+ print_tree(t.root6, 128); -+ } -+ -+ for (i = 0; i < NUM_QUERIES; ++i) { -+ prandom_bytes(ip, 4); -+ if (lookup(t.root4, 32, ip) != -+ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { -+ pr_err("allowedips random self-test: FAIL\n"); -+ goto free; -+ } -+ } -+ -+ for (i = 0; i < NUM_QUERIES; ++i) { -+ prandom_bytes(ip, 16); -+ if (lookup(t.root6, 128, ip) != -+ horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { -+ pr_err("allowedips random self-test: FAIL\n"); -+ goto free; -+ } -+ } -+ ret = true; -+ -+free: -+ mutex_lock(&mutex); -+free_locked: -+ wg_allowedips_free(&t, &mutex); -+ mutex_unlock(&mutex); -+ horrible_allowedips_free(&h); -+ if (peers) { -+ for (i = 0; i < NUM_PEERS; ++i) -+ kfree(peers[i]); -+ } -+ kfree(peers); -+ return ret; -+} -+ -+static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) -+{ -+ static struct in_addr ip; -+ u8 *split = (u8 *)&ip; -+ -+ split[0] = a; -+ split[1] = b; -+ split[2] = c; -+ split[3] = d; -+ return &ip; -+} -+ -+static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) -+{ -+ static struct in6_addr ip; -+ __be32 *split = (__be32 *)&ip; -+ -+ split[0] = cpu_to_be32(a); -+ split[1] = cpu_to_be32(b); -+ split[2] = cpu_to_be32(c); -+ split[3] = cpu_to_be32(d); -+ return &ip; -+} -+ -+static __init struct wg_peer *init_peer(void) -+{ -+ struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); -+ -+ if (!peer) -+ return NULL; -+ kref_init(&peer->refcount); -+ INIT_LIST_HEAD(&peer->allowedips_list); -+ return peer; -+} -+ -+#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ -+ wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ -+ cidr, mem, &mutex) -+ -+#define maybe_fail() do { \ -+ ++i; \ -+ if (!_s) { \ -+ pr_info("allowedips self-test %zu: FAIL\n", i); \ -+ success = false; \ -+ } \ -+ } while (0) -+ -+#define test(version, mem, ipa, ipb, ipc, ipd) do { \ -+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ -+ ip##version(ipa, ipb, ipc, ipd)) == (mem); \ -+ maybe_fail(); \ -+ } while (0) -+ -+#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ -+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ -+ ip##version(ipa, ipb, ipc, ipd)) != (mem); \ -+ maybe_fail(); \ -+ } while (0) -+ -+#define test_boolean(cond) do { \ -+ bool _s = (cond); \ -+ maybe_fail(); \ -+ } while (0) -+ -+bool __init wg_allowedips_selftest(void) -+{ -+ bool found_a = false, found_b = false, found_c = false, found_d = false, -+ found_e = false, found_other = false; -+ struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), -+ *d = init_peer(), *e = init_peer(), *f = init_peer(), -+ *g = init_peer(), *h = init_peer(); -+ struct allowedips_node *iter_node; -+ bool success = false; -+ struct allowedips t; -+ DEFINE_MUTEX(mutex); -+ struct in6_addr ip; -+ size_t i = 0, count = 0; -+ __be64 part; -+ -+ mutex_init(&mutex); -+ mutex_lock(&mutex); -+ wg_allowedips_init(&t); -+ -+ if (!a || !b || !c || !d || !e || !f || !g || !h) { -+ pr_err("allowedips self-test malloc: FAIL\n"); -+ goto free; -+ } -+ -+ insert(4, a, 192, 168, 4, 0, 24); -+ insert(4, b, 192, 168, 4, 4, 32); -+ insert(4, c, 192, 168, 0, 0, 16); -+ insert(4, d, 192, 95, 5, 64, 27); -+ /* replaces previous entry, and maskself is required */ -+ insert(4, c, 192, 95, 5, 65, 27); -+ insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); -+ insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); -+ insert(4, e, 0, 0, 0, 0, 0); -+ insert(6, e, 0, 0, 0, 0, 0); -+ /* replaces previous entry */ -+ insert(6, f, 0, 0, 0, 0, 0); -+ insert(6, g, 0x24046800, 0, 0, 0, 32); -+ /* maskself is required */ -+ insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); -+ insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); -+ insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); -+ insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); -+ insert(4, g, 64, 15, 112, 0, 20); -+ /* maskself is required */ -+ insert(4, h, 64, 15, 123, 211, 25); -+ insert(4, a, 10, 0, 0, 0, 25); -+ insert(4, b, 10, 0, 0, 128, 25); -+ insert(4, a, 10, 1, 0, 0, 30); -+ insert(4, b, 10, 1, 0, 4, 30); -+ insert(4, c, 10, 1, 0, 8, 29); -+ insert(4, d, 10, 1, 0, 16, 29); -+ -+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { -+ print_tree(t.root4, 32); -+ print_tree(t.root6, 128); -+ } -+ -+ success = true; -+ -+ test(4, a, 192, 168, 4, 20); -+ test(4, a, 192, 168, 4, 0); -+ test(4, b, 192, 168, 4, 4); -+ test(4, c, 192, 168, 200, 182); -+ test(4, c, 192, 95, 5, 68); -+ test(4, e, 192, 95, 5, 96); -+ test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); -+ test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); -+ test(6, f, 0x26075300, 0x60006b01, 0, 0); -+ test(6, g, 0x24046800, 0x40040806, 0, 0x1006); -+ test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); -+ test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); -+ test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); -+ test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); -+ test(6, h, 0x24046800, 0x40040800, 0, 0); -+ test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); -+ test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); -+ test(4, g, 64, 15, 116, 26); -+ test(4, g, 64, 15, 127, 3); -+ test(4, g, 64, 15, 123, 1); -+ test(4, h, 64, 15, 123, 128); -+ test(4, h, 64, 15, 123, 129); -+ test(4, a, 10, 0, 0, 52); -+ test(4, b, 10, 0, 0, 220); -+ test(4, a, 10, 1, 0, 2); -+ test(4, b, 10, 1, 0, 6); -+ test(4, c, 10, 1, 0, 10); -+ test(4, d, 10, 1, 0, 20); -+ -+ insert(4, a, 1, 0, 0, 0, 32); -+ insert(4, a, 64, 0, 0, 0, 32); -+ insert(4, a, 128, 0, 0, 0, 32); -+ insert(4, a, 192, 0, 0, 0, 32); -+ insert(4, a, 255, 0, 0, 0, 32); -+ wg_allowedips_remove_by_peer(&t, a, &mutex); -+ test_negative(4, a, 1, 0, 0, 0); -+ test_negative(4, a, 64, 0, 0, 0); -+ test_negative(4, a, 128, 0, 0, 0); -+ test_negative(4, a, 192, 0, 0, 0); -+ test_negative(4, a, 255, 0, 0, 0); -+ -+ wg_allowedips_free(&t, &mutex); -+ wg_allowedips_init(&t); -+ insert(4, a, 192, 168, 0, 0, 16); -+ insert(4, a, 192, 168, 0, 0, 24); -+ wg_allowedips_remove_by_peer(&t, a, &mutex); -+ test_negative(4, a, 192, 168, 0, 1); -+ -+ /* These will hit the WARN_ON(len >= 128) in free_node if something -+ * goes wrong. -+ */ -+ for (i = 0; i < 128; ++i) { -+ part = cpu_to_be64(~(1LLU << (i % 64))); -+ memset(&ip, 0xff, 16); -+ memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); -+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); -+ } -+ -+ wg_allowedips_free(&t, &mutex); -+ -+ wg_allowedips_init(&t); -+ insert(4, a, 192, 95, 5, 93, 27); -+ insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); -+ insert(4, a, 10, 1, 0, 20, 29); -+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); -+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); -+ list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { -+ u8 cidr, ip[16] __aligned(__alignof(u64)); -+ int family = wg_allowedips_read_node(iter_node, ip, &cidr); -+ -+ count++; -+ -+ if (cidr == 27 && family == AF_INET && -+ !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) -+ found_a = true; -+ else if (cidr == 128 && family == AF_INET6 && -+ !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), -+ sizeof(struct in6_addr))) -+ found_b = true; -+ else if (cidr == 29 && family == AF_INET && -+ !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) -+ found_c = true; -+ else if (cidr == 83 && family == AF_INET6 && -+ !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), -+ sizeof(struct in6_addr))) -+ found_d = true; -+ else if (cidr == 21 && family == AF_INET6 && -+ !memcmp(ip, ip6(0x26075000, 0, 0, 0), -+ sizeof(struct in6_addr))) -+ found_e = true; -+ else -+ found_other = true; -+ } -+ test_boolean(count == 5); -+ test_boolean(found_a); -+ test_boolean(found_b); -+ test_boolean(found_c); -+ test_boolean(found_d); -+ test_boolean(found_e); -+ test_boolean(!found_other); -+ -+ if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) -+ success = randomized_test(); -+ -+ if (success) -+ pr_info("allowedips self-tests: pass\n"); -+ -+free: -+ wg_allowedips_free(&t, &mutex); -+ kfree(a); -+ kfree(b); -+ kfree(c); -+ kfree(d); -+ kfree(e); -+ kfree(f); -+ kfree(g); -+ kfree(h); -+ mutex_unlock(&mutex); -+ -+ return success; -+} -+ -+#undef test_negative -+#undef test -+#undef remove -+#undef insert -+#undef init_peer -+ -+#endif ---- /dev/null -+++ b/drivers/net/wireguard/selftest/counter.c -@@ -0,0 +1,104 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifdef DEBUG -+bool __init wg_packet_counter_selftest(void) -+{ -+ unsigned int test_num = 0, i; -+ union noise_counter counter; -+ bool success = true; -+ -+#define T_INIT do { \ -+ memset(&counter, 0, sizeof(union noise_counter)); \ -+ spin_lock_init(&counter.receive.lock); \ -+ } while (0) -+#define T_LIM (COUNTER_WINDOW_SIZE + 1) -+#define T(n, v) do { \ -+ ++test_num; \ -+ if (counter_validate(&counter, n) != (v)) { \ -+ pr_err("nonce counter self-test %u: FAIL\n", \ -+ test_num); \ -+ success = false; \ -+ } \ -+ } while (0) -+ -+ T_INIT; -+ /* 1 */ T(0, true); -+ /* 2 */ T(1, true); -+ /* 3 */ T(1, false); -+ /* 4 */ T(9, true); -+ /* 5 */ T(8, true); -+ /* 6 */ T(7, true); -+ /* 7 */ T(7, false); -+ /* 8 */ T(T_LIM, true); -+ /* 9 */ T(T_LIM - 1, true); -+ /* 10 */ T(T_LIM - 1, false); -+ /* 11 */ T(T_LIM - 2, true); -+ /* 12 */ T(2, true); -+ /* 13 */ T(2, false); -+ /* 14 */ T(T_LIM + 16, true); -+ /* 15 */ T(3, false); -+ /* 16 */ T(T_LIM + 16, false); -+ /* 17 */ T(T_LIM * 4, true); -+ /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); -+ /* 19 */ T(10, false); -+ /* 20 */ T(T_LIM * 4 - T_LIM, false); -+ /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); -+ /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); -+ /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); -+ /* 24 */ T(0, false); -+ /* 25 */ T(REJECT_AFTER_MESSAGES, false); -+ /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); -+ /* 27 */ T(REJECT_AFTER_MESSAGES, false); -+ /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); -+ /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); -+ /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); -+ /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); -+ /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); -+ /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); -+ /* 34 */ T(0, false); -+ -+ T_INIT; -+ for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) -+ T(i, true); -+ T(0, true); -+ T(0, false); -+ -+ T_INIT; -+ for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) -+ T(i, true); -+ T(1, true); -+ T(0, false); -+ -+ T_INIT; -+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) -+ T(i, true); -+ -+ T_INIT; -+ for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) -+ T(i, true); -+ T(0, false); -+ -+ T_INIT; -+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) -+ T(i, true); -+ T(COUNTER_WINDOW_SIZE + 1, true); -+ T(0, false); -+ -+ T_INIT; -+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) -+ T(i, true); -+ T(0, true); -+ T(COUNTER_WINDOW_SIZE + 1, true); -+ -+#undef T -+#undef T_LIM -+#undef T_INIT -+ -+ if (success) -+ pr_info("nonce counter self-tests: pass\n"); -+ return success; -+} -+#endif ---- /dev/null -+++ b/drivers/net/wireguard/selftest/ratelimiter.c -@@ -0,0 +1,226 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifdef DEBUG -+ -+#include <linux/jiffies.h> -+ -+static const struct { -+ bool result; -+ unsigned int msec_to_sleep_before; -+} expected_results[] __initconst = { -+ [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, -+ [PACKETS_BURSTABLE] = { false, 0 }, -+ [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, -+ [PACKETS_BURSTABLE + 2] = { false, 0 }, -+ [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, -+ [PACKETS_BURSTABLE + 4] = { true, 0 }, -+ [PACKETS_BURSTABLE + 5] = { false, 0 } -+}; -+ -+static __init unsigned int maximum_jiffies_at_index(int index) -+{ -+ unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; -+ int i; -+ -+ for (i = 0; i <= index; ++i) -+ total_msecs += expected_results[i].msec_to_sleep_before; -+ return msecs_to_jiffies(total_msecs); -+} -+ -+static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, -+ struct sk_buff *skb6, struct ipv6hdr *hdr6, -+ int *test) -+{ -+ unsigned long loop_start_time; -+ int i; -+ -+ wg_ratelimiter_gc_entries(NULL); -+ rcu_barrier(); -+ loop_start_time = jiffies; -+ -+ for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { -+ if (expected_results[i].msec_to_sleep_before) -+ msleep(expected_results[i].msec_to_sleep_before); -+ -+ if (time_is_before_jiffies(loop_start_time + -+ maximum_jiffies_at_index(i))) -+ return -ETIMEDOUT; -+ if (wg_ratelimiter_allow(skb4, &init_net) != -+ expected_results[i].result) -+ return -EXFULL; -+ ++(*test); -+ -+ hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); -+ if (time_is_before_jiffies(loop_start_time + -+ maximum_jiffies_at_index(i))) -+ return -ETIMEDOUT; -+ if (!wg_ratelimiter_allow(skb4, &init_net)) -+ return -EXFULL; -+ ++(*test); -+ -+ hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); -+ hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); -+ if (time_is_before_jiffies(loop_start_time + -+ maximum_jiffies_at_index(i))) -+ return -ETIMEDOUT; -+ if (wg_ratelimiter_allow(skb6, &init_net) != -+ expected_results[i].result) -+ return -EXFULL; -+ ++(*test); -+ -+ hdr6->saddr.in6_u.u6_addr32[0] = -+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); -+ if (time_is_before_jiffies(loop_start_time + -+ maximum_jiffies_at_index(i))) -+ return -ETIMEDOUT; -+ if (!wg_ratelimiter_allow(skb6, &init_net)) -+ return -EXFULL; -+ ++(*test); -+ -+ hdr6->saddr.in6_u.u6_addr32[0] = -+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); -+ -+ if (time_is_before_jiffies(loop_start_time + -+ maximum_jiffies_at_index(i))) -+ return -ETIMEDOUT; -+#endif -+ } -+ return 0; -+} -+ -+static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, -+ int *test) -+{ -+ int i; -+ -+ wg_ratelimiter_gc_entries(NULL); -+ rcu_barrier(); -+ -+ if (atomic_read(&total_entries)) -+ return -EXFULL; -+ ++(*test); -+ -+ for (i = 0; i <= max_entries; ++i) { -+ hdr4->saddr = htonl(i); -+ if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) -+ return -EXFULL; -+ ++(*test); -+ } -+ return 0; -+} -+ -+bool __init wg_ratelimiter_selftest(void) -+{ -+ enum { TRIALS_BEFORE_GIVING_UP = 5000 }; -+ bool success = false; -+ int test = 0, trials; -+ struct sk_buff *skb4, *skb6; -+ struct iphdr *hdr4; -+ struct ipv6hdr *hdr6; -+ -+ if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) -+ return true; -+ -+ BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); -+ -+ if (wg_ratelimiter_init()) -+ goto out; -+ ++test; -+ if (wg_ratelimiter_init()) { -+ wg_ratelimiter_uninit(); -+ goto out; -+ } -+ ++test; -+ if (wg_ratelimiter_init()) { -+ wg_ratelimiter_uninit(); -+ wg_ratelimiter_uninit(); -+ goto out; -+ } -+ ++test; -+ -+ skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); -+ if (unlikely(!skb4)) -+ goto err_nofree; -+ skb4->protocol = htons(ETH_P_IP); -+ hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); -+ hdr4->saddr = htonl(8182); -+ skb_reset_network_header(skb4); -+ ++test; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); -+ if (unlikely(!skb6)) { -+ kfree_skb(skb4); -+ goto err_nofree; -+ } -+ skb6->protocol = htons(ETH_P_IPV6); -+ hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); -+ hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); -+ hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); -+ skb_reset_network_header(skb6); -+ ++test; -+#endif -+ -+ for (trials = TRIALS_BEFORE_GIVING_UP;;) { -+ int test_count = 0, ret; -+ -+ ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); -+ if (ret == -ETIMEDOUT) { -+ if (!trials--) { -+ test += test_count; -+ goto err; -+ } -+ msleep(500); -+ continue; -+ } else if (ret < 0) { -+ test += test_count; -+ goto err; -+ } else { -+ test += test_count; -+ break; -+ } -+ } -+ -+ for (trials = TRIALS_BEFORE_GIVING_UP;;) { -+ int test_count = 0; -+ -+ if (capacity_test(skb4, hdr4, &test_count) < 0) { -+ if (!trials--) { -+ test += test_count; -+ goto err; -+ } -+ msleep(50); -+ continue; -+ } -+ test += test_count; -+ break; -+ } -+ -+ success = true; -+ -+err: -+ kfree_skb(skb4); -+#if IS_ENABLED(CONFIG_IPV6) -+ kfree_skb(skb6); -+#endif -+err_nofree: -+ wg_ratelimiter_uninit(); -+ wg_ratelimiter_uninit(); -+ wg_ratelimiter_uninit(); -+ /* Uninit one extra time to check underflow detection. */ -+ wg_ratelimiter_uninit(); -+out: -+ if (success) -+ pr_info("ratelimiter self-tests: pass\n"); -+ else -+ pr_err("ratelimiter self-test %d: FAIL\n", test); -+ -+ return success; -+} -+#endif ---- /dev/null -+++ b/drivers/net/wireguard/send.c -@@ -0,0 +1,413 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "queueing.h" -+#include "timers.h" -+#include "device.h" -+#include "peer.h" -+#include "socket.h" -+#include "messages.h" -+#include "cookie.h" -+ -+#include <linux/uio.h> -+#include <linux/inetdevice.h> -+#include <linux/socket.h> -+#include <net/ip_tunnels.h> -+#include <net/udp.h> -+#include <net/sock.h> -+ -+static void wg_packet_send_handshake_initiation(struct wg_peer *peer) -+{ -+ struct message_handshake_initiation packet; -+ -+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), -+ REKEY_TIMEOUT)) -+ return; /* This function is rate limited. */ -+ -+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); -+ net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ -+ if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { -+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); -+ wg_timers_any_authenticated_packet_traversal(peer); -+ wg_timers_any_authenticated_packet_sent(peer); -+ atomic64_set(&peer->last_sent_handshake, -+ ktime_get_coarse_boottime_ns()); -+ wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), -+ HANDSHAKE_DSCP); -+ wg_timers_handshake_initiated(peer); -+ } -+} -+ -+void wg_packet_handshake_send_worker(struct work_struct *work) -+{ -+ struct wg_peer *peer = container_of(work, struct wg_peer, -+ transmit_handshake_work); -+ -+ wg_packet_send_handshake_initiation(peer); -+ wg_peer_put(peer); -+} -+ -+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, -+ bool is_retry) -+{ -+ if (!is_retry) -+ peer->timer_handshake_attempts = 0; -+ -+ rcu_read_lock_bh(); -+ /* We check last_sent_handshake here in addition to the actual function -+ * we're queueing up, so that we don't queue things if not strictly -+ * necessary: -+ */ -+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), -+ REKEY_TIMEOUT) || -+ unlikely(READ_ONCE(peer->is_dead))) -+ goto out; -+ -+ wg_peer_get(peer); -+ /* Queues up calling packet_send_queued_handshakes(peer), where we do a -+ * peer_put(peer) after: -+ */ -+ if (!queue_work(peer->device->handshake_send_wq, -+ &peer->transmit_handshake_work)) -+ /* If the work was already queued, we want to drop the -+ * extra reference: -+ */ -+ wg_peer_put(peer); -+out: -+ rcu_read_unlock_bh(); -+} -+ -+void wg_packet_send_handshake_response(struct wg_peer *peer) -+{ -+ struct message_handshake_response packet; -+ -+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); -+ net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ -+ if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { -+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); -+ if (wg_noise_handshake_begin_session(&peer->handshake, -+ &peer->keypairs)) { -+ wg_timers_session_derived(peer); -+ wg_timers_any_authenticated_packet_traversal(peer); -+ wg_timers_any_authenticated_packet_sent(peer); -+ atomic64_set(&peer->last_sent_handshake, -+ ktime_get_coarse_boottime_ns()); -+ wg_socket_send_buffer_to_peer(peer, &packet, -+ sizeof(packet), -+ HANDSHAKE_DSCP); -+ } -+ } -+} -+ -+void wg_packet_send_handshake_cookie(struct wg_device *wg, -+ struct sk_buff *initiating_skb, -+ __le32 sender_index) -+{ -+ struct message_handshake_cookie packet; -+ -+ net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", -+ wg->dev->name, initiating_skb); -+ wg_cookie_message_create(&packet, initiating_skb, sender_index, -+ &wg->cookie_checker); -+ wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, -+ sizeof(packet)); -+} -+ -+static void keep_key_fresh(struct wg_peer *peer) -+{ -+ struct noise_keypair *keypair; -+ bool send = false; -+ -+ rcu_read_lock_bh(); -+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair); -+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && -+ (unlikely(atomic64_read(&keypair->sending.counter.counter) > -+ REKEY_AFTER_MESSAGES) || -+ (keypair->i_am_the_initiator && -+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, -+ REKEY_AFTER_TIME))))) -+ send = true; -+ rcu_read_unlock_bh(); -+ -+ if (send) -+ wg_packet_send_queued_handshake_initiation(peer, false); -+} -+ -+static unsigned int calculate_skb_padding(struct sk_buff *skb) -+{ -+ /* We do this modulo business with the MTU, just in case the networking -+ * layer gives us a packet that's bigger than the MTU. In that case, we -+ * wouldn't want the final subtraction to overflow in the case of the -+ * padded_size being clamped. -+ */ -+ unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; -+ unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); -+ -+ if (padded_size > PACKET_CB(skb)->mtu) -+ padded_size = PACKET_CB(skb)->mtu; -+ return padded_size - last_unit; -+} -+ -+static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) -+{ -+ unsigned int padding_len, plaintext_len, trailer_len; -+ struct scatterlist sg[MAX_SKB_FRAGS + 8]; -+ struct message_data *header; -+ struct sk_buff *trailer; -+ int num_frags; -+ -+ /* Calculate lengths. */ -+ padding_len = calculate_skb_padding(skb); -+ trailer_len = padding_len + noise_encrypted_len(0); -+ plaintext_len = skb->len + padding_len; -+ -+ /* Expand data section to have room for padding and auth tag. */ -+ num_frags = skb_cow_data(skb, trailer_len, &trailer); -+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) -+ return false; -+ -+ /* Set the padding to zeros, and make sure it and the auth tag are part -+ * of the skb. -+ */ -+ memset(skb_tail_pointer(trailer), 0, padding_len); -+ -+ /* Expand head section to have room for our header and the network -+ * stack's headers. -+ */ -+ if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) -+ return false; -+ -+ /* Finalize checksum calculation for the inner packet, if required. */ -+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && -+ skb_checksum_help(skb))) -+ return false; -+ -+ /* Only after checksumming can we safely add on the padding at the end -+ * and the header. -+ */ -+ skb_set_inner_network_header(skb, 0); -+ header = (struct message_data *)skb_push(skb, sizeof(*header)); -+ header->header.type = cpu_to_le32(MESSAGE_DATA); -+ header->key_idx = keypair->remote_index; -+ header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); -+ pskb_put(skb, trailer, trailer_len); -+ -+ /* Now we can encrypt the scattergather segments */ -+ sg_init_table(sg, num_frags); -+ if (skb_to_sgvec(skb, sg, sizeof(struct message_data), -+ noise_encrypted_len(plaintext_len)) <= 0) -+ return false; -+ return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, -+ PACKET_CB(skb)->nonce, -+ keypair->sending.key); -+} -+ -+void wg_packet_send_keepalive(struct wg_peer *peer) -+{ -+ struct sk_buff *skb; -+ -+ if (skb_queue_empty(&peer->staged_packet_queue)) { -+ skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, -+ GFP_ATOMIC); -+ if (unlikely(!skb)) -+ return; -+ skb_reserve(skb, DATA_PACKET_HEAD_ROOM); -+ skb->dev = peer->device->dev; -+ PACKET_CB(skb)->mtu = skb->dev->mtu; -+ skb_queue_tail(&peer->staged_packet_queue, skb); -+ net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr); -+ } -+ -+ wg_packet_send_staged_packets(peer); -+} -+ -+static void wg_packet_create_data_done(struct sk_buff *first, -+ struct wg_peer *peer) -+{ -+ struct sk_buff *skb, *next; -+ bool is_keepalive, data_sent = false; -+ -+ wg_timers_any_authenticated_packet_traversal(peer); -+ wg_timers_any_authenticated_packet_sent(peer); -+ skb_list_walk_safe(first, skb, next) { -+ is_keepalive = skb->len == message_data_len(0); -+ if (likely(!wg_socket_send_skb_to_peer(peer, skb, -+ PACKET_CB(skb)->ds) && !is_keepalive)) -+ data_sent = true; -+ } -+ -+ if (likely(data_sent)) -+ wg_timers_data_sent(peer); -+ -+ keep_key_fresh(peer); -+} -+ -+void wg_packet_tx_worker(struct work_struct *work) -+{ -+ struct crypt_queue *queue = container_of(work, struct crypt_queue, -+ work); -+ struct noise_keypair *keypair; -+ enum packet_state state; -+ struct sk_buff *first; -+ struct wg_peer *peer; -+ -+ while ((first = __ptr_ring_peek(&queue->ring)) != NULL && -+ (state = atomic_read_acquire(&PACKET_CB(first)->state)) != -+ PACKET_STATE_UNCRYPTED) { -+ __ptr_ring_discard_one(&queue->ring); -+ peer = PACKET_PEER(first); -+ keypair = PACKET_CB(first)->keypair; -+ -+ if (likely(state == PACKET_STATE_CRYPTED)) -+ wg_packet_create_data_done(first, peer); -+ else -+ kfree_skb_list(first); -+ -+ wg_noise_keypair_put(keypair, false); -+ wg_peer_put(peer); -+ } -+} -+ -+void wg_packet_encrypt_worker(struct work_struct *work) -+{ -+ struct crypt_queue *queue = container_of(work, struct multicore_worker, -+ work)->ptr; -+ struct sk_buff *first, *skb, *next; -+ -+ while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { -+ enum packet_state state = PACKET_STATE_CRYPTED; -+ -+ skb_list_walk_safe(first, skb, next) { -+ if (likely(encrypt_packet(skb, -+ PACKET_CB(first)->keypair))) { -+ wg_reset_packet(skb); -+ } else { -+ state = PACKET_STATE_DEAD; -+ break; -+ } -+ } -+ wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, -+ state); -+ -+ } -+} -+ -+static void wg_packet_create_data(struct sk_buff *first) -+{ -+ struct wg_peer *peer = PACKET_PEER(first); -+ struct wg_device *wg = peer->device; -+ int ret = -EINVAL; -+ -+ rcu_read_lock_bh(); -+ if (unlikely(READ_ONCE(peer->is_dead))) -+ goto err; -+ -+ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, -+ &peer->tx_queue, first, -+ wg->packet_crypt_wq, -+ &wg->encrypt_queue.last_cpu); -+ if (unlikely(ret == -EPIPE)) -+ wg_queue_enqueue_per_peer(&peer->tx_queue, first, -+ PACKET_STATE_DEAD); -+err: -+ rcu_read_unlock_bh(); -+ if (likely(!ret || ret == -EPIPE)) -+ return; -+ wg_noise_keypair_put(PACKET_CB(first)->keypair, false); -+ wg_peer_put(peer); -+ kfree_skb_list(first); -+} -+ -+void wg_packet_purge_staged_packets(struct wg_peer *peer) -+{ -+ spin_lock_bh(&peer->staged_packet_queue.lock); -+ peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; -+ __skb_queue_purge(&peer->staged_packet_queue); -+ spin_unlock_bh(&peer->staged_packet_queue.lock); -+} -+ -+void wg_packet_send_staged_packets(struct wg_peer *peer) -+{ -+ struct noise_symmetric_key *key; -+ struct noise_keypair *keypair; -+ struct sk_buff_head packets; -+ struct sk_buff *skb; -+ -+ /* Steal the current queue into our local one. */ -+ __skb_queue_head_init(&packets); -+ spin_lock_bh(&peer->staged_packet_queue.lock); -+ skb_queue_splice_init(&peer->staged_packet_queue, &packets); -+ spin_unlock_bh(&peer->staged_packet_queue.lock); -+ if (unlikely(skb_queue_empty(&packets))) -+ return; -+ -+ /* First we make sure we have a valid reference to a valid key. */ -+ rcu_read_lock_bh(); -+ keypair = wg_noise_keypair_get( -+ rcu_dereference_bh(peer->keypairs.current_keypair)); -+ rcu_read_unlock_bh(); -+ if (unlikely(!keypair)) -+ goto out_nokey; -+ key = &keypair->sending; -+ if (unlikely(!READ_ONCE(key->is_valid))) -+ goto out_nokey; -+ if (unlikely(wg_birthdate_has_expired(key->birthdate, -+ REJECT_AFTER_TIME))) -+ goto out_invalid; -+ -+ /* After we know we have a somewhat valid key, we now try to assign -+ * nonces to all of the packets in the queue. If we can't assign nonces -+ * for all of them, we just consider it a failure and wait for the next -+ * handshake. -+ */ -+ skb_queue_walk(&packets, skb) { -+ /* 0 for no outer TOS: no leak. TODO: at some later point, we -+ * might consider using flowi->tos as outer instead. -+ */ -+ PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); -+ PACKET_CB(skb)->nonce = -+ atomic64_inc_return(&key->counter.counter) - 1; -+ if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) -+ goto out_invalid; -+ } -+ -+ packets.prev->next = NULL; -+ wg_peer_get(keypair->entry.peer); -+ PACKET_CB(packets.next)->keypair = keypair; -+ wg_packet_create_data(packets.next); -+ return; -+ -+out_invalid: -+ WRITE_ONCE(key->is_valid, false); -+out_nokey: -+ wg_noise_keypair_put(keypair, false); -+ -+ /* We orphan the packets if we're waiting on a handshake, so that they -+ * don't block a socket's pool. -+ */ -+ skb_queue_walk(&packets, skb) -+ skb_orphan(skb); -+ /* Then we put them back on the top of the queue. We're not too -+ * concerned about accidentally getting things a little out of order if -+ * packets are being added really fast, because this queue is for before -+ * packets can even be sent and it's small anyway. -+ */ -+ spin_lock_bh(&peer->staged_packet_queue.lock); -+ skb_queue_splice(&packets, &peer->staged_packet_queue); -+ spin_unlock_bh(&peer->staged_packet_queue.lock); -+ -+ /* If we're exiting because there's something wrong with the key, it -+ * means we should initiate a new handshake. -+ */ -+ wg_packet_send_queued_handshake_initiation(peer, false); -+} ---- /dev/null -+++ b/drivers/net/wireguard/socket.c -@@ -0,0 +1,437 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "device.h" -+#include "peer.h" -+#include "socket.h" -+#include "queueing.h" -+#include "messages.h" -+ -+#include <linux/ctype.h> -+#include <linux/net.h> -+#include <linux/if_vlan.h> -+#include <linux/if_ether.h> -+#include <linux/inetdevice.h> -+#include <net/udp_tunnel.h> -+#include <net/ipv6.h> -+ -+static int send4(struct wg_device *wg, struct sk_buff *skb, -+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache) -+{ -+ struct flowi4 fl = { -+ .saddr = endpoint->src4.s_addr, -+ .daddr = endpoint->addr4.sin_addr.s_addr, -+ .fl4_dport = endpoint->addr4.sin_port, -+ .flowi4_mark = wg->fwmark, -+ .flowi4_proto = IPPROTO_UDP -+ }; -+ struct rtable *rt = NULL; -+ struct sock *sock; -+ int ret = 0; -+ -+ skb_mark_not_on_list(skb); -+ skb->dev = wg->dev; -+ skb->mark = wg->fwmark; -+ -+ rcu_read_lock_bh(); -+ sock = rcu_dereference_bh(wg->sock4); -+ -+ if (unlikely(!sock)) { -+ ret = -ENONET; -+ goto err; -+ } -+ -+ fl.fl4_sport = inet_sk(sock)->inet_sport; -+ -+ if (cache) -+ rt = dst_cache_get_ip4(cache, &fl.saddr); -+ -+ if (!rt) { -+ security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); -+ if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, -+ fl.saddr, RT_SCOPE_HOST))) { -+ endpoint->src4.s_addr = 0; -+ *(__force __be32 *)&endpoint->src_if4 = 0; -+ fl.saddr = 0; -+ if (cache) -+ dst_cache_reset(cache); -+ } -+ rt = ip_route_output_flow(sock_net(sock), &fl, sock); -+ if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && -+ PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && -+ rt->dst.dev->ifindex != endpoint->src_if4)))) { -+ endpoint->src4.s_addr = 0; -+ *(__force __be32 *)&endpoint->src_if4 = 0; -+ fl.saddr = 0; -+ if (cache) -+ dst_cache_reset(cache); -+ if (!IS_ERR(rt)) -+ ip_rt_put(rt); -+ rt = ip_route_output_flow(sock_net(sock), &fl, sock); -+ } -+ if (unlikely(IS_ERR(rt))) { -+ ret = PTR_ERR(rt); -+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", -+ wg->dev->name, &endpoint->addr, ret); -+ goto err; -+ } else if (unlikely(rt->dst.dev == skb->dev)) { -+ ip_rt_put(rt); -+ ret = -ELOOP; -+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", -+ wg->dev->name, &endpoint->addr); -+ goto err; -+ } -+ if (cache) -+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr); -+ } -+ -+ skb->ignore_df = 1; -+ udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, -+ ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, -+ fl.fl4_dport, false, false); -+ goto out; -+ -+err: -+ kfree_skb(skb); -+out: -+ rcu_read_unlock_bh(); -+ return ret; -+} -+ -+static int send6(struct wg_device *wg, struct sk_buff *skb, -+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache) -+{ -+#if IS_ENABLED(CONFIG_IPV6) -+ struct flowi6 fl = { -+ .saddr = endpoint->src6, -+ .daddr = endpoint->addr6.sin6_addr, -+ .fl6_dport = endpoint->addr6.sin6_port, -+ .flowi6_mark = wg->fwmark, -+ .flowi6_oif = endpoint->addr6.sin6_scope_id, -+ .flowi6_proto = IPPROTO_UDP -+ /* TODO: addr->sin6_flowinfo */ -+ }; -+ struct dst_entry *dst = NULL; -+ struct sock *sock; -+ int ret = 0; -+ -+ skb_mark_not_on_list(skb); -+ skb->dev = wg->dev; -+ skb->mark = wg->fwmark; -+ -+ rcu_read_lock_bh(); -+ sock = rcu_dereference_bh(wg->sock6); -+ -+ if (unlikely(!sock)) { -+ ret = -ENONET; -+ goto err; -+ } -+ -+ fl.fl6_sport = inet_sk(sock)->inet_sport; -+ -+ if (cache) -+ dst = dst_cache_get_ip6(cache, &fl.saddr); -+ -+ if (!dst) { -+ security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); -+ if (unlikely(!ipv6_addr_any(&fl.saddr) && -+ !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { -+ endpoint->src6 = fl.saddr = in6addr_any; -+ if (cache) -+ dst_cache_reset(cache); -+ } -+ dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, -+ NULL); -+ if (unlikely(IS_ERR(dst))) { -+ ret = PTR_ERR(dst); -+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", -+ wg->dev->name, &endpoint->addr, ret); -+ goto err; -+ } else if (unlikely(dst->dev == skb->dev)) { -+ dst_release(dst); -+ ret = -ELOOP; -+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", -+ wg->dev->name, &endpoint->addr); -+ goto err; -+ } -+ if (cache) -+ dst_cache_set_ip6(cache, dst, &fl.saddr); -+ } -+ -+ skb->ignore_df = 1; -+ udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, -+ ip6_dst_hoplimit(dst), 0, fl.fl6_sport, -+ fl.fl6_dport, false); -+ goto out; -+ -+err: -+ kfree_skb(skb); -+out: -+ rcu_read_unlock_bh(); -+ return ret; -+#else -+ return -EAFNOSUPPORT; -+#endif -+} -+ -+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) -+{ -+ size_t skb_len = skb->len; -+ int ret = -EAFNOSUPPORT; -+ -+ read_lock_bh(&peer->endpoint_lock); -+ if (peer->endpoint.addr.sa_family == AF_INET) -+ ret = send4(peer->device, skb, &peer->endpoint, ds, -+ &peer->endpoint_cache); -+ else if (peer->endpoint.addr.sa_family == AF_INET6) -+ ret = send6(peer->device, skb, &peer->endpoint, ds, -+ &peer->endpoint_cache); -+ else -+ dev_kfree_skb(skb); -+ if (likely(!ret)) -+ peer->tx_bytes += skb_len; -+ read_unlock_bh(&peer->endpoint_lock); -+ -+ return ret; -+} -+ -+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, -+ size_t len, u8 ds) -+{ -+ struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); -+ -+ if (unlikely(!skb)) -+ return -ENOMEM; -+ -+ skb_reserve(skb, SKB_HEADER_LEN); -+ skb_set_inner_network_header(skb, 0); -+ skb_put_data(skb, buffer, len); -+ return wg_socket_send_skb_to_peer(peer, skb, ds); -+} -+ -+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, -+ struct sk_buff *in_skb, void *buffer, -+ size_t len) -+{ -+ int ret = 0; -+ struct sk_buff *skb; -+ struct endpoint endpoint; -+ -+ if (unlikely(!in_skb)) -+ return -EINVAL; -+ ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); -+ if (unlikely(ret < 0)) -+ return ret; -+ -+ skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); -+ if (unlikely(!skb)) -+ return -ENOMEM; -+ skb_reserve(skb, SKB_HEADER_LEN); -+ skb_set_inner_network_header(skb, 0); -+ skb_put_data(skb, buffer, len); -+ -+ if (endpoint.addr.sa_family == AF_INET) -+ ret = send4(wg, skb, &endpoint, 0, NULL); -+ else if (endpoint.addr.sa_family == AF_INET6) -+ ret = send6(wg, skb, &endpoint, 0, NULL); -+ /* No other possibilities if the endpoint is valid, which it is, -+ * as we checked above. -+ */ -+ -+ return ret; -+} -+ -+int wg_socket_endpoint_from_skb(struct endpoint *endpoint, -+ const struct sk_buff *skb) -+{ -+ memset(endpoint, 0, sizeof(*endpoint)); -+ if (skb->protocol == htons(ETH_P_IP)) { -+ endpoint->addr4.sin_family = AF_INET; -+ endpoint->addr4.sin_port = udp_hdr(skb)->source; -+ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; -+ endpoint->src4.s_addr = ip_hdr(skb)->daddr; -+ endpoint->src_if4 = skb->skb_iif; -+ } else if (skb->protocol == htons(ETH_P_IPV6)) { -+ endpoint->addr6.sin6_family = AF_INET6; -+ endpoint->addr6.sin6_port = udp_hdr(skb)->source; -+ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; -+ endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( -+ &ipv6_hdr(skb)->saddr, skb->skb_iif); -+ endpoint->src6 = ipv6_hdr(skb)->daddr; -+ } else { -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) -+{ -+ return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && -+ a->addr4.sin_port == b->addr4.sin_port && -+ a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && -+ a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || -+ (a->addr.sa_family == AF_INET6 && -+ b->addr.sa_family == AF_INET6 && -+ a->addr6.sin6_port == b->addr6.sin6_port && -+ ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && -+ a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && -+ ipv6_addr_equal(&a->src6, &b->src6)) || -+ unlikely(!a->addr.sa_family && !b->addr.sa_family); -+} -+ -+void wg_socket_set_peer_endpoint(struct wg_peer *peer, -+ const struct endpoint *endpoint) -+{ -+ /* First we check unlocked, in order to optimize, since it's pretty rare -+ * that an endpoint will change. If we happen to be mid-write, and two -+ * CPUs wind up writing the same thing or something slightly different, -+ * it doesn't really matter much either. -+ */ -+ if (endpoint_eq(endpoint, &peer->endpoint)) -+ return; -+ write_lock_bh(&peer->endpoint_lock); -+ if (endpoint->addr.sa_family == AF_INET) { -+ peer->endpoint.addr4 = endpoint->addr4; -+ peer->endpoint.src4 = endpoint->src4; -+ peer->endpoint.src_if4 = endpoint->src_if4; -+ } else if (endpoint->addr.sa_family == AF_INET6) { -+ peer->endpoint.addr6 = endpoint->addr6; -+ peer->endpoint.src6 = endpoint->src6; -+ } else { -+ goto out; -+ } -+ dst_cache_reset(&peer->endpoint_cache); -+out: -+ write_unlock_bh(&peer->endpoint_lock); -+} -+ -+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, -+ const struct sk_buff *skb) -+{ -+ struct endpoint endpoint; -+ -+ if (!wg_socket_endpoint_from_skb(&endpoint, skb)) -+ wg_socket_set_peer_endpoint(peer, &endpoint); -+} -+ -+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) -+{ -+ write_lock_bh(&peer->endpoint_lock); -+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); -+ dst_cache_reset(&peer->endpoint_cache); -+ write_unlock_bh(&peer->endpoint_lock); -+} -+ -+static int wg_receive(struct sock *sk, struct sk_buff *skb) -+{ -+ struct wg_device *wg; -+ -+ if (unlikely(!sk)) -+ goto err; -+ wg = sk->sk_user_data; -+ if (unlikely(!wg)) -+ goto err; -+ wg_packet_receive(wg, skb); -+ return 0; -+ -+err: -+ kfree_skb(skb); -+ return 0; -+} -+ -+static void sock_free(struct sock *sock) -+{ -+ if (unlikely(!sock)) -+ return; -+ sk_clear_memalloc(sock); -+ udp_tunnel_sock_release(sock->sk_socket); -+} -+ -+static void set_sock_opts(struct socket *sock) -+{ -+ sock->sk->sk_allocation = GFP_ATOMIC; -+ sock->sk->sk_sndbuf = INT_MAX; -+ sk_set_memalloc(sock->sk); -+} -+ -+int wg_socket_init(struct wg_device *wg, u16 port) -+{ -+ int ret; -+ struct udp_tunnel_sock_cfg cfg = { -+ .sk_user_data = wg, -+ .encap_type = 1, -+ .encap_rcv = wg_receive -+ }; -+ struct socket *new4 = NULL, *new6 = NULL; -+ struct udp_port_cfg port4 = { -+ .family = AF_INET, -+ .local_ip.s_addr = htonl(INADDR_ANY), -+ .local_udp_port = htons(port), -+ .use_udp_checksums = true -+ }; -+#if IS_ENABLED(CONFIG_IPV6) -+ int retries = 0; -+ struct udp_port_cfg port6 = { -+ .family = AF_INET6, -+ .local_ip6 = IN6ADDR_ANY_INIT, -+ .use_udp6_tx_checksums = true, -+ .use_udp6_rx_checksums = true, -+ .ipv6_v6only = true -+ }; -+#endif -+ -+#if IS_ENABLED(CONFIG_IPV6) -+retry: -+#endif -+ -+ ret = udp_sock_create(wg->creating_net, &port4, &new4); -+ if (ret < 0) { -+ pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); -+ return ret; -+ } -+ set_sock_opts(new4); -+ setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (ipv6_mod_enabled()) { -+ port6.local_udp_port = inet_sk(new4->sk)->inet_sport; -+ ret = udp_sock_create(wg->creating_net, &port6, &new6); -+ if (ret < 0) { -+ udp_tunnel_sock_release(new4); -+ if (ret == -EADDRINUSE && !port && retries++ < 100) -+ goto retry; -+ pr_err("%s: Could not create IPv6 socket\n", -+ wg->dev->name); -+ return ret; -+ } -+ set_sock_opts(new6); -+ setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); -+ } -+#endif -+ -+ wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); -+ return 0; -+} -+ -+void wg_socket_reinit(struct wg_device *wg, struct sock *new4, -+ struct sock *new6) -+{ -+ struct sock *old4, *old6; -+ -+ mutex_lock(&wg->socket_update_lock); -+ old4 = rcu_dereference_protected(wg->sock4, -+ lockdep_is_held(&wg->socket_update_lock)); -+ old6 = rcu_dereference_protected(wg->sock6, -+ lockdep_is_held(&wg->socket_update_lock)); -+ rcu_assign_pointer(wg->sock4, new4); -+ rcu_assign_pointer(wg->sock6, new6); -+ if (new4) -+ wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); -+ mutex_unlock(&wg->socket_update_lock); -+ synchronize_rcu(); -+ synchronize_net(); -+ sock_free(old4); -+ sock_free(old6); -+} ---- /dev/null -+++ b/drivers/net/wireguard/socket.h -@@ -0,0 +1,44 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_SOCKET_H -+#define _WG_SOCKET_H -+ -+#include <linux/netdevice.h> -+#include <linux/udp.h> -+#include <linux/if_vlan.h> -+#include <linux/if_ether.h> -+ -+int wg_socket_init(struct wg_device *wg, u16 port); -+void wg_socket_reinit(struct wg_device *wg, struct sock *new4, -+ struct sock *new6); -+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, -+ size_t len, u8 ds); -+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, -+ u8 ds); -+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, -+ struct sk_buff *in_skb, -+ void *out_buffer, size_t len); -+ -+int wg_socket_endpoint_from_skb(struct endpoint *endpoint, -+ const struct sk_buff *skb); -+void wg_socket_set_peer_endpoint(struct wg_peer *peer, -+ const struct endpoint *endpoint); -+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, -+ const struct sk_buff *skb); -+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); -+ -+#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) -+#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ -+ struct endpoint __endpoint; \ -+ wg_socket_endpoint_from_skb(&__endpoint, skb); \ -+ net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ -+ ##__VA_ARGS__); \ -+ } while (0) -+#else -+#define net_dbg_skb_ratelimited(fmt, skb, ...) -+#endif -+ -+#endif /* _WG_SOCKET_H */ ---- /dev/null -+++ b/drivers/net/wireguard/timers.c -@@ -0,0 +1,243 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include "timers.h" -+#include "device.h" -+#include "peer.h" -+#include "queueing.h" -+#include "socket.h" -+ -+/* -+ * - Timer for retransmitting the handshake if we don't hear back after -+ * `REKEY_TIMEOUT + jitter` ms. -+ * -+ * - Timer for sending empty packet if we have received a packet but after have -+ * not sent one for `KEEPALIVE_TIMEOUT` ms. -+ * -+ * - Timer for initiating new handshake if we have sent a packet but after have -+ * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + -+ * jitter` ms. -+ * -+ * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms -+ * if no new keys have been received. -+ * -+ * - Timer for, if enabled, sending an empty authenticated packet every user- -+ * specified seconds. -+ */ -+ -+static inline void mod_peer_timer(struct wg_peer *peer, -+ struct timer_list *timer, -+ unsigned long expires) -+{ -+ rcu_read_lock_bh(); -+ if (likely(netif_running(peer->device->dev) && -+ !READ_ONCE(peer->is_dead))) -+ mod_timer(timer, expires); -+ rcu_read_unlock_bh(); -+} -+ -+static void wg_expired_retransmit_handshake(struct timer_list *timer) -+{ -+ struct wg_peer *peer = from_timer(peer, timer, -+ timer_retransmit_handshake); -+ -+ if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { -+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); -+ -+ del_timer(&peer->timer_send_keepalive); -+ /* We drop all packets without a keypair and don't try again, -+ * if we try unsuccessfully for too long to make a handshake. -+ */ -+ wg_packet_purge_staged_packets(peer); -+ -+ /* We set a timer for destroying any residue that might be left -+ * of a partial exchange. -+ */ -+ if (!timer_pending(&peer->timer_zero_key_material)) -+ mod_peer_timer(peer, &peer->timer_zero_key_material, -+ jiffies + REJECT_AFTER_TIME * 3 * HZ); -+ } else { -+ ++peer->timer_handshake_attempts; -+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr, REKEY_TIMEOUT, -+ peer->timer_handshake_attempts + 1); -+ -+ /* We clear the endpoint address src address, in case this is -+ * the cause of trouble. -+ */ -+ wg_socket_clear_peer_endpoint_src(peer); -+ -+ wg_packet_send_queued_handshake_initiation(peer, true); -+ } -+} -+ -+static void wg_expired_send_keepalive(struct timer_list *timer) -+{ -+ struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); -+ -+ wg_packet_send_keepalive(peer); -+ if (peer->timer_need_another_keepalive) { -+ peer->timer_need_another_keepalive = false; -+ mod_peer_timer(peer, &peer->timer_send_keepalive, -+ jiffies + KEEPALIVE_TIMEOUT * HZ); -+ } -+} -+ -+static void wg_expired_new_handshake(struct timer_list *timer) -+{ -+ struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); -+ -+ pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); -+ /* We clear the endpoint address src address, in case this is the cause -+ * of trouble. -+ */ -+ wg_socket_clear_peer_endpoint_src(peer); -+ wg_packet_send_queued_handshake_initiation(peer, false); -+} -+ -+static void wg_expired_zero_key_material(struct timer_list *timer) -+{ -+ struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); -+ -+ rcu_read_lock_bh(); -+ if (!READ_ONCE(peer->is_dead)) { -+ wg_peer_get(peer); -+ if (!queue_work(peer->device->handshake_send_wq, -+ &peer->clear_peer_work)) -+ /* If the work was already on the queue, we want to drop -+ * the extra reference. -+ */ -+ wg_peer_put(peer); -+ } -+ rcu_read_unlock_bh(); -+} -+ -+static void wg_queued_expired_zero_key_material(struct work_struct *work) -+{ -+ struct wg_peer *peer = container_of(work, struct wg_peer, -+ clear_peer_work); -+ -+ pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", -+ peer->device->dev->name, peer->internal_id, -+ &peer->endpoint.addr, REJECT_AFTER_TIME * 3); -+ wg_noise_handshake_clear(&peer->handshake); -+ wg_noise_keypairs_clear(&peer->keypairs); -+ wg_peer_put(peer); -+} -+ -+static void wg_expired_send_persistent_keepalive(struct timer_list *timer) -+{ -+ struct wg_peer *peer = from_timer(peer, timer, -+ timer_persistent_keepalive); -+ -+ if (likely(peer->persistent_keepalive_interval)) -+ wg_packet_send_keepalive(peer); -+} -+ -+/* Should be called after an authenticated data packet is sent. */ -+void wg_timers_data_sent(struct wg_peer *peer) -+{ -+ if (!timer_pending(&peer->timer_new_handshake)) -+ mod_peer_timer(peer, &peer->timer_new_handshake, -+ jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + -+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); -+} -+ -+/* Should be called after an authenticated data packet is received. */ -+void wg_timers_data_received(struct wg_peer *peer) -+{ -+ if (likely(netif_running(peer->device->dev))) { -+ if (!timer_pending(&peer->timer_send_keepalive)) -+ mod_peer_timer(peer, &peer->timer_send_keepalive, -+ jiffies + KEEPALIVE_TIMEOUT * HZ); -+ else -+ peer->timer_need_another_keepalive = true; -+ } -+} -+ -+/* Should be called after any type of authenticated packet is sent, whether -+ * keepalive, data, or handshake. -+ */ -+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) -+{ -+ del_timer(&peer->timer_send_keepalive); -+} -+ -+/* Should be called after any type of authenticated packet is received, whether -+ * keepalive, data, or handshake. -+ */ -+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) -+{ -+ del_timer(&peer->timer_new_handshake); -+} -+ -+/* Should be called after a handshake initiation message is sent. */ -+void wg_timers_handshake_initiated(struct wg_peer *peer) -+{ -+ mod_peer_timer(peer, &peer->timer_retransmit_handshake, -+ jiffies + REKEY_TIMEOUT * HZ + -+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); -+} -+ -+/* Should be called after a handshake response message is received and processed -+ * or when getting key confirmation via the first data message. -+ */ -+void wg_timers_handshake_complete(struct wg_peer *peer) -+{ -+ del_timer(&peer->timer_retransmit_handshake); -+ peer->timer_handshake_attempts = 0; -+ peer->sent_lastminute_handshake = false; -+ ktime_get_real_ts64(&peer->walltime_last_handshake); -+} -+ -+/* Should be called after an ephemeral key is created, which is before sending a -+ * handshake response or after receiving a handshake response. -+ */ -+void wg_timers_session_derived(struct wg_peer *peer) -+{ -+ mod_peer_timer(peer, &peer->timer_zero_key_material, -+ jiffies + REJECT_AFTER_TIME * 3 * HZ); -+} -+ -+/* Should be called before a packet with authentication, whether -+ * keepalive, data, or handshakem is sent, or after one is received. -+ */ -+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) -+{ -+ if (peer->persistent_keepalive_interval) -+ mod_peer_timer(peer, &peer->timer_persistent_keepalive, -+ jiffies + peer->persistent_keepalive_interval * HZ); -+} -+ -+void wg_timers_init(struct wg_peer *peer) -+{ -+ timer_setup(&peer->timer_retransmit_handshake, -+ wg_expired_retransmit_handshake, 0); -+ timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); -+ timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); -+ timer_setup(&peer->timer_zero_key_material, -+ wg_expired_zero_key_material, 0); -+ timer_setup(&peer->timer_persistent_keepalive, -+ wg_expired_send_persistent_keepalive, 0); -+ INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); -+ peer->timer_handshake_attempts = 0; -+ peer->sent_lastminute_handshake = false; -+ peer->timer_need_another_keepalive = false; -+} -+ -+void wg_timers_stop(struct wg_peer *peer) -+{ -+ del_timer_sync(&peer->timer_retransmit_handshake); -+ del_timer_sync(&peer->timer_send_keepalive); -+ del_timer_sync(&peer->timer_new_handshake); -+ del_timer_sync(&peer->timer_zero_key_material); -+ del_timer_sync(&peer->timer_persistent_keepalive); -+ flush_work(&peer->clear_peer_work); -+} ---- /dev/null -+++ b/drivers/net/wireguard/timers.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#ifndef _WG_TIMERS_H -+#define _WG_TIMERS_H -+ -+#include <linux/ktime.h> -+ -+struct wg_peer; -+ -+void wg_timers_init(struct wg_peer *peer); -+void wg_timers_stop(struct wg_peer *peer); -+void wg_timers_data_sent(struct wg_peer *peer); -+void wg_timers_data_received(struct wg_peer *peer); -+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); -+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); -+void wg_timers_handshake_initiated(struct wg_peer *peer); -+void wg_timers_handshake_complete(struct wg_peer *peer); -+void wg_timers_session_derived(struct wg_peer *peer); -+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); -+ -+static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, -+ u64 expiration_seconds) -+{ -+ return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) -+ <= (s64)ktime_get_coarse_boottime_ns(); -+} -+ -+#endif /* _WG_TIMERS_H */ ---- /dev/null -+++ b/drivers/net/wireguard/version.h -@@ -0,0 +1 @@ -+#define WIREGUARD_VERSION "1.0.0" ---- /dev/null -+++ b/include/uapi/linux/wireguard.h -@@ -0,0 +1,196 @@ -+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * -+ * Documentation -+ * ============= -+ * -+ * The below enums and macros are for interfacing with WireGuard, using generic -+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two -+ * methods: get and set. Note that while they share many common attributes, -+ * these two functions actually accept a slightly different set of inputs and -+ * outputs. -+ * -+ * WG_CMD_GET_DEVICE -+ * ----------------- -+ * -+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain -+ * one but not both of: -+ * -+ * WGDEVICE_A_IFINDEX: NLA_U32 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * -+ * The kernel will then return several messages (NLM_F_MULTI) containing the -+ * following tree of nested items: -+ * -+ * WGDEVICE_A_IFINDEX: NLA_U32 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN -+ * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN -+ * WGDEVICE_A_LISTEN_PORT: NLA_U16 -+ * WGDEVICE_A_FWMARK: NLA_U32 -+ * WGDEVICE_A_PEERS: NLA_NESTED -+ * 0: NLA_NESTED -+ * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN -+ * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN -+ * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 -+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 -+ * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec -+ * WGPEER_A_RX_BYTES: NLA_U64 -+ * WGPEER_A_TX_BYTES: NLA_U64 -+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED -+ * 0: NLA_NESTED -+ * WGALLOWEDIP_A_FAMILY: NLA_U16 -+ * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr -+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 -+ * 0: NLA_NESTED -+ * ... -+ * 0: NLA_NESTED -+ * ... -+ * ... -+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32 -+ * 0: NLA_NESTED -+ * ... -+ * ... -+ * -+ * It is possible that all of the allowed IPs of a single peer will not -+ * fit within a single netlink message. In that case, the same peer will -+ * be written in the following message, except it will only contain -+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several -+ * times in a row for the same peer. It is then up to the receiver to -+ * coalesce adjacent peers. Likewise, it is possible that all peers will -+ * not fit within a single message. So, subsequent peers will be sent -+ * in following messages, except those will only contain WGDEVICE_A_IFNAME -+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these -+ * messages to form the complete list of peers. -+ * -+ * Since this is an NLA_F_DUMP command, the final message will always be -+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message -+ * contains an integer error code. It is either zero or a negative error -+ * code corresponding to the errno. -+ * -+ * WG_CMD_SET_DEVICE -+ * ----------------- -+ * -+ * May only be called via NLM_F_REQUEST. The command should contain the -+ * following tree of nested items, containing one but not both of -+ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: -+ * -+ * WGDEVICE_A_IFINDEX: NLA_U32 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current -+ * peers should be removed prior to adding the list below. -+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove -+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly -+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable -+ * WGDEVICE_A_PEERS: NLA_NESTED -+ * 0: NLA_NESTED -+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN -+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the -+ * specified peer should not exist at the end of the -+ * operation, rather than added/updated and/or -+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed -+ * IPs of this peer should be removed prior to adding -+ * the list below and/or WGPEER_F_UPDATE_ONLY if the -+ * peer should only be set if it already exists. -+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove -+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 -+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable -+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED -+ * 0: NLA_NESTED -+ * WGALLOWEDIP_A_FAMILY: NLA_U16 -+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr -+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 -+ * 0: NLA_NESTED -+ * ... -+ * 0: NLA_NESTED -+ * ... -+ * ... -+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at -+ * all by most users of this API, as the -+ * most recent protocol will be used when -+ * this is unset. Otherwise, must be set -+ * to 1. -+ * 0: NLA_NESTED -+ * ... -+ * ... -+ * -+ * It is possible that the amount of configuration data exceeds that of -+ * the maximum message length accepted by the kernel. In that case, several -+ * messages should be sent one after another, with each successive one -+ * filling in information not contained in the prior. Note that if -+ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably -+ * should not be specified in fragments that come after, so that the list -+ * of peers is only cleared the first time but appened after. Likewise for -+ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message -+ * of a peer, it likely should not be specified in subsequent fragments. -+ * -+ * If an error occurs, NLMSG_ERROR will reply containing an errno. -+ */ -+ -+#ifndef _WG_UAPI_WIREGUARD_H -+#define _WG_UAPI_WIREGUARD_H -+ -+#define WG_GENL_NAME "wireguard" -+#define WG_GENL_VERSION 1 -+ -+#define WG_KEY_LEN 32 -+ -+enum wg_cmd { -+ WG_CMD_GET_DEVICE, -+ WG_CMD_SET_DEVICE, -+ __WG_CMD_MAX -+}; -+#define WG_CMD_MAX (__WG_CMD_MAX - 1) -+ -+enum wgdevice_flag { -+ WGDEVICE_F_REPLACE_PEERS = 1U << 0, -+ __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS -+}; -+enum wgdevice_attribute { -+ WGDEVICE_A_UNSPEC, -+ WGDEVICE_A_IFINDEX, -+ WGDEVICE_A_IFNAME, -+ WGDEVICE_A_PRIVATE_KEY, -+ WGDEVICE_A_PUBLIC_KEY, -+ WGDEVICE_A_FLAGS, -+ WGDEVICE_A_LISTEN_PORT, -+ WGDEVICE_A_FWMARK, -+ WGDEVICE_A_PEERS, -+ __WGDEVICE_A_LAST -+}; -+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) -+ -+enum wgpeer_flag { -+ WGPEER_F_REMOVE_ME = 1U << 0, -+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, -+ WGPEER_F_UPDATE_ONLY = 1U << 2, -+ __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | -+ WGPEER_F_UPDATE_ONLY -+}; -+enum wgpeer_attribute { -+ WGPEER_A_UNSPEC, -+ WGPEER_A_PUBLIC_KEY, -+ WGPEER_A_PRESHARED_KEY, -+ WGPEER_A_FLAGS, -+ WGPEER_A_ENDPOINT, -+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, -+ WGPEER_A_LAST_HANDSHAKE_TIME, -+ WGPEER_A_RX_BYTES, -+ WGPEER_A_TX_BYTES, -+ WGPEER_A_ALLOWEDIPS, -+ WGPEER_A_PROTOCOL_VERSION, -+ __WGPEER_A_LAST -+}; -+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) -+ -+enum wgallowedip_attribute { -+ WGALLOWEDIP_A_UNSPEC, -+ WGALLOWEDIP_A_FAMILY, -+ WGALLOWEDIP_A_IPADDR, -+ WGALLOWEDIP_A_CIDR_MASK, -+ __WGALLOWEDIP_A_LAST -+}; -+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) -+ -+#endif /* _WG_UAPI_WIREGUARD_H */ ---- /dev/null -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -0,0 +1,537 @@ -+#!/bin/bash -+# SPDX-License-Identifier: GPL-2.0 -+# -+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+# -+# This script tests the below topology: -+# -+# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ -+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ -+# │ │ │ │ │ │ -+# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ -+# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││ -+# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│ -+# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ -+# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ -+# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ -+# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ -+# └──────────────────────────────────┘ -+# -+# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the -+# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 -+# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further -+# details on how this is accomplished. -+set -e -+ -+exec 3>&1 -+export WG_HIDE_KEYS=never -+netns0="wg-test-$$-0" -+netns1="wg-test-$$-1" -+netns2="wg-test-$$-2" -+pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } -+pp() { pretty "" "$*"; "$@"; } -+maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } -+n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } -+n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } -+n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } -+ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } -+ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } -+ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } -+sleep() { read -t "$1" -N 0 || true; } -+waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } -+waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } -+waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } -+waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } -+ -+cleanup() { -+ set +e -+ exec 2>/dev/null -+ printf "$orig_message_cost" > /proc/sys/net/core/message_cost -+ ip0 link del dev wg0 -+ ip1 link del dev wg0 -+ ip2 link del dev wg0 -+ local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" -+ [[ -n $to_kill ]] && kill $to_kill -+ pp ip netns del $netns1 -+ pp ip netns del $netns2 -+ pp ip netns del $netns0 -+ exit -+} -+ -+orig_message_cost="$(< /proc/sys/net/core/message_cost)" -+trap cleanup EXIT -+printf 0 > /proc/sys/net/core/message_cost -+ -+ip netns del $netns0 2>/dev/null || true -+ip netns del $netns1 2>/dev/null || true -+ip netns del $netns2 2>/dev/null || true -+pp ip netns add $netns0 -+pp ip netns add $netns1 -+pp ip netns add $netns2 -+ip0 link set up dev lo -+ -+ip0 link add dev wg0 type wireguard -+ip0 link set wg0 netns $netns1 -+ip0 link add dev wg0 type wireguard -+ip0 link set wg0 netns $netns2 -+key1="$(pp wg genkey)" -+key2="$(pp wg genkey)" -+key3="$(pp wg genkey)" -+pub1="$(pp wg pubkey <<<"$key1")" -+pub2="$(pp wg pubkey <<<"$key2")" -+pub3="$(pp wg pubkey <<<"$key3")" -+psk="$(pp wg genpsk)" -+[[ -n $key1 && -n $key2 && -n $psk ]] -+ -+configure_peers() { -+ ip1 addr add 192.168.241.1/24 dev wg0 -+ ip1 addr add fd00::1/24 dev wg0 -+ -+ ip2 addr add 192.168.241.2/24 dev wg0 -+ ip2 addr add fd00::2/24 dev wg0 -+ -+ n1 wg set wg0 \ -+ private-key <(echo "$key1") \ -+ listen-port 1 \ -+ peer "$pub2" \ -+ preshared-key <(echo "$psk") \ -+ allowed-ips 192.168.241.2/32,fd00::2/128 -+ n2 wg set wg0 \ -+ private-key <(echo "$key2") \ -+ listen-port 2 \ -+ peer "$pub1" \ -+ preshared-key <(echo "$psk") \ -+ allowed-ips 192.168.241.1/32,fd00::1/128 -+ -+ ip1 link set up dev wg0 -+ ip2 link set up dev wg0 -+} -+configure_peers -+ -+tests() { -+ # Ping over IPv4 -+ n2 ping -c 10 -f -W 1 192.168.241.1 -+ n1 ping -c 10 -f -W 1 192.168.241.2 -+ -+ # Ping over IPv6 -+ n2 ping6 -c 10 -f -W 1 fd00::1 -+ n1 ping6 -c 10 -f -W 1 fd00::2 -+ -+ # TCP over IPv4 -+ n2 iperf3 -s -1 -B 192.168.241.2 & -+ waitiperf $netns2 -+ n1 iperf3 -Z -t 3 -c 192.168.241.2 -+ -+ # TCP over IPv6 -+ n1 iperf3 -s -1 -B fd00::1 & -+ waitiperf $netns1 -+ n2 iperf3 -Z -t 3 -c fd00::1 -+ -+ # UDP over IPv4 -+ n1 iperf3 -s -1 -B 192.168.241.1 & -+ waitiperf $netns1 -+ n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 -+ -+ # UDP over IPv6 -+ n2 iperf3 -s -1 -B fd00::2 & -+ waitiperf $netns2 -+ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 -+} -+ -+[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" -+big_mtu=$(( 34816 - 1500 + $orig_mtu )) -+ -+# Test using IPv4 as outer transport -+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 -+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 -+# Before calling tests, we first make sure that the stats counters and timestamper are working -+n2 ping -c 10 -f -W 1 192.168.241.1 -+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) -+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) -+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) -+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) -+read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) -+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) -+read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) -+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) -+read _ timestamp < <(n1 wg show wg0 latest-handshakes) -+(( timestamp != 0 )) -+ -+tests -+ip1 link set wg0 mtu $big_mtu -+ip2 link set wg0 mtu $big_mtu -+tests -+ -+ip1 link set wg0 mtu $orig_mtu -+ip2 link set wg0 mtu $orig_mtu -+ -+# Test using IPv6 as outer transport -+n1 wg set wg0 peer "$pub2" endpoint [::1]:2 -+n2 wg set wg0 peer "$pub1" endpoint [::1]:1 -+tests -+ip1 link set wg0 mtu $big_mtu -+ip2 link set wg0 mtu $big_mtu -+tests -+ -+# Test that route MTUs work with the padding -+ip1 link set wg0 mtu 1300 -+ip2 link set wg0 mtu 1300 -+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 -+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 -+n0 iptables -A INPUT -m length --length 1360 -j DROP -+n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 -+n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 -+n2 ping -c 1 -W 1 -s 1269 192.168.241.1 -+n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 -+n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 -+n0 iptables -F INPUT -+ -+ip1 link set wg0 mtu $orig_mtu -+ip2 link set wg0 mtu $orig_mtu -+ -+# Test using IPv4 that roaming works -+ip0 -4 addr del 127.0.0.1/8 dev lo -+ip0 -4 addr add 127.212.121.99/8 dev lo -+n1 wg set wg0 listen-port 9999 -+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 -+n1 ping6 -W 1 -c 1 fd00::2 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] -+ -+# Test using IPv6 that roaming works -+n1 wg set wg0 listen-port 9998 -+n1 wg set wg0 peer "$pub2" endpoint [::1]:2 -+n1 ping -W 1 -c 1 192.168.241.2 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] -+ -+# Test that crypto-RP filter works -+n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 -+exec 4< <(n1 ncat -l -u -p 1111) -+ncat_pid=$! -+waitncatudp $netns1 -+n2 ncat -u 192.168.241.1 1111 <<<"X" -+read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] -+kill $ncat_pid -+more_specific_key="$(pp wg genkey | pp wg pubkey)" -+n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 -+n2 wg set wg0 listen-port 9997 -+exec 4< <(n1 ncat -l -u -p 1111) -+ncat_pid=$! -+waitncatudp $netns1 -+n2 ncat -u 192.168.241.1 1111 <<<"X" -+! read -r -N 1 -t 1 out <&4 || false -+kill $ncat_pid -+n1 wg set wg0 peer "$more_specific_key" remove -+[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] -+ -+# Test that we can change private keys keys and immediately handshake -+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 -+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 -+n1 ping -W 1 -c 1 192.168.241.2 -+n1 wg set wg0 private-key <(echo "$key3") -+n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove -+n1 ping -W 1 -c 1 192.168.241.2 -+ -+ip1 link del wg0 -+ip2 link del wg0 -+ -+# Test using NAT. We now change the topology to this: -+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ -+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ -+# │ │ │ │ │ │ -+# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ -+# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ -+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ -+# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ -+# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ -+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ -+# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ -+ -+ip1 link add dev wg0 type wireguard -+ip2 link add dev wg0 type wireguard -+configure_peers -+ -+ip0 link add vethrc type veth peer name vethc -+ip0 link add vethrs type veth peer name veths -+ip0 link set vethc netns $netns1 -+ip0 link set veths netns $netns2 -+ip0 link set vethrc up -+ip0 link set vethrs up -+ip0 addr add 192.168.1.1/24 dev vethrc -+ip0 addr add 10.0.0.1/24 dev vethrs -+ip1 addr add 192.168.1.100/24 dev vethc -+ip1 link set vethc up -+ip1 route add default via 192.168.1.1 -+ip2 addr add 10.0.0.100/24 dev veths -+ip2 link set veths up -+waitiface $netns0 vethrc -+waitiface $netns0 vethrs -+waitiface $netns1 vethc -+waitiface $netns2 veths -+ -+n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' -+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' -+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' -+n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 -+ -+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 -+n1 ping -W 1 -c 1 192.168.241.2 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] -+# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). -+pp sleep 3 -+n2 ping -W 1 -c 1 192.168.241.1 -+n1 wg set wg0 peer "$pub2" persistent-keepalive 0 -+ -+# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. -+ip1 -6 addr add fc00::9/96 dev vethc -+ip1 -6 route add default via fc00::1 -+ip2 -4 addr add 192.168.99.7/32 dev wg0 -+ip2 -6 addr add abab::1111/128 dev wg0 -+n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 -+ip1 -6 route add default dev wg0 table 51820 -+ip1 -6 rule add not fwmark 51820 table 51820 -+ip1 -6 rule add table main suppress_prefixlength 0 -+ip1 -4 route add default dev wg0 table 51820 -+ip1 -4 rule add not fwmark 51820 table 51820 -+ip1 -4 rule add table main suppress_prefixlength 0 -+# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. -+if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then -+ # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. -+ n1 ping -W 1 -c 100 -f 192.168.99.7 -+ n1 ping -W 1 -c 100 -f abab::1111 -+fi -+ -+n0 iptables -t nat -F -+ip0 link del vethrc -+ip0 link del vethrs -+ip1 link del wg0 -+ip2 link del wg0 -+ -+# Test that saddr routing is sticky but not too sticky, changing to this topology: -+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ -+# │ $ns1 namespace │ │ $ns2 namespace │ -+# │ │ │ │ -+# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ -+# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │ -+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │ -+# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ -+# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ -+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ -+# └────────────────────────────────────────┘ └────────────────────────────────────────┘ -+ -+ip1 link add dev wg0 type wireguard -+ip2 link add dev wg0 type wireguard -+configure_peers -+ip1 link add veth1 type veth peer name veth2 -+ip1 link set veth2 netns $netns2 -+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' -+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' -+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' -+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' -+n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' -+ -+# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed -+ip1 addr add 10.0.0.1/24 dev veth1 -+ip1 addr add fd00:aa::1/96 dev veth1 -+ip2 addr add 10.0.0.2/24 dev veth2 -+ip2 addr add fd00:aa::2/96 dev veth2 -+ip1 link set veth1 up -+ip2 link set veth2 up -+waitiface $netns1 veth1 -+waitiface $netns2 veth2 -+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 -+n1 ping -W 1 -c 1 192.168.241.2 -+ip1 addr add 10.0.0.10/24 dev veth1 -+ip1 addr del 10.0.0.1/24 dev veth1 -+n1 ping -W 1 -c 1 192.168.241.2 -+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 -+n1 ping -W 1 -c 1 192.168.241.2 -+ip1 addr add fd00:aa::10/96 dev veth1 -+ip1 addr del fd00:aa::1/96 dev veth1 -+n1 ping -W 1 -c 1 192.168.241.2 -+ -+# Now we show that we can successfully do reply to sender routing -+ip1 link set veth1 down -+ip2 link set veth2 down -+ip1 addr flush dev veth1 -+ip2 addr flush dev veth2 -+ip1 addr add 10.0.0.1/24 dev veth1 -+ip1 addr add 10.0.0.2/24 dev veth1 -+ip1 addr add fd00:aa::1/96 dev veth1 -+ip1 addr add fd00:aa::2/96 dev veth1 -+ip2 addr add 10.0.0.3/24 dev veth2 -+ip2 addr add fd00:aa::3/96 dev veth2 -+ip1 link set veth1 up -+ip2 link set veth2 up -+waitiface $netns1 veth1 -+waitiface $netns2 veth2 -+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] -+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] -+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] -+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] -+ -+# What happens if the inbound destination address belongs to a different interface as the default route? -+ip1 link add dummy0 type dummy -+ip1 addr add 10.50.0.1/24 dev dummy0 -+ip1 link set dummy0 up -+ip2 route add 10.50.0.0/24 dev veth2 -+n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 -+n2 ping -W 1 -c 1 192.168.241.1 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] -+ -+ip1 link del dummy0 -+ip1 addr flush dev veth1 -+ip2 addr flush dev veth2 -+ip1 route flush dev veth1 -+ip2 route flush dev veth2 -+ -+# Now we see what happens if another interface route takes precedence over an ongoing one -+ip1 link add veth3 type veth peer name veth4 -+ip1 link set veth4 netns $netns2 -+ip1 addr add 10.0.0.1/24 dev veth1 -+ip2 addr add 10.0.0.2/24 dev veth2 -+ip1 addr add 10.0.0.3/24 dev veth3 -+ip1 link set veth1 up -+ip2 link set veth2 up -+ip1 link set veth3 up -+ip2 link set veth4 up -+waitiface $netns1 veth1 -+waitiface $netns2 veth2 -+waitiface $netns1 veth3 -+waitiface $netns2 veth4 -+ip1 route flush dev veth1 -+ip1 route flush dev veth3 -+ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 -+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 -+n1 ping -W 1 -c 1 192.168.241.2 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] -+ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 -+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' -+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' -+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' -+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' -+n1 ping -W 1 -c 1 192.168.241.2 -+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] -+ -+ip1 link del veth1 -+ip1 link del veth3 -+ip1 link del wg0 -+ip2 link del wg0 -+ -+# We test that Netlink/IPC is working properly by doing things that usually cause split responses -+ip0 link add dev wg0 type wireguard -+config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) -+for a in {1..255}; do -+ for b in {0..255}; do -+ config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) -+ done -+done -+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") -+i=0 -+for ip in $(n0 wg show wg0 allowed-ips); do -+ ((++i)) -+done -+((i == 255*256*2+1)) -+ip0 link del wg0 -+ip0 link add dev wg0 type wireguard -+config=( "[Interface]" "PrivateKey=$(wg genkey)" ) -+for a in {1..40}; do -+ config+=( "[Peer]" "PublicKey=$(wg genkey)" ) -+ for b in {1..52}; do -+ config+=( "AllowedIPs=$a.$b.0.0/16" ) -+ done -+done -+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") -+i=0 -+while read -r line; do -+ j=0 -+ for ip in $line; do -+ ((++j)) -+ done -+ ((j == 53)) -+ ((++i)) -+done < <(n0 wg show wg0 allowed-ips) -+((i == 40)) -+ip0 link del wg0 -+ip0 link add wg0 type wireguard -+config=( ) -+for i in {1..29}; do -+ config+=( "[Peer]" "PublicKey=$(wg genkey)" ) -+done -+config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) -+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") -+n0 wg showconf wg0 > /dev/null -+ip0 link del wg0 -+ -+allowedips=( ) -+for i in {1..197}; do -+ allowedips+=( abcd::$i ) -+done -+saved_ifs="$IFS" -+IFS=, -+allowedips="${allowedips[*]}" -+IFS="$saved_ifs" -+ip0 link add wg0 type wireguard -+n0 wg set wg0 peer "$pub1" -+n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" -+{ -+ read -r pub allowedips -+ [[ $pub == "$pub1" && $allowedips == "(none)" ]] -+ read -r pub allowedips -+ [[ $pub == "$pub2" ]] -+ i=0 -+ for _ in $allowedips; do -+ ((++i)) -+ done -+ ((i == 197)) -+} < <(n0 wg show wg0 allowed-ips) -+ip0 link del wg0 -+ -+! n0 wg show doesnotexist || false -+ -+ip0 link add wg0 type wireguard -+n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") -+[[ $(n0 wg show wg0 private-key) == "$key1" ]] -+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] -+n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null -+[[ $(n0 wg show wg0 private-key) == "(none)" ]] -+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] -+n0 wg set wg0 peer "$pub2" -+n0 wg set wg0 private-key <(echo "$key2") -+[[ $(n0 wg show wg0 public-key) == "$pub2" ]] -+[[ -z $(n0 wg show wg0 peers) ]] -+n0 wg set wg0 peer "$pub2" -+[[ -z $(n0 wg show wg0 peers) ]] -+n0 wg set wg0 private-key <(echo "$key1") -+n0 wg set wg0 peer "$pub2" -+[[ $(n0 wg show wg0 peers) == "$pub2" ]] -+n0 wg set wg0 private-key <(echo "/${key1:1}") -+[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] -+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 -+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 -+n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 -+n0 wg set wg0 peer "$pub2" allowed-ips ::/0 -+ip0 link del wg0 -+ -+declare -A objects -+while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do -+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue -+ objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" -+done < /dev/kmsg -+alldeleted=1 -+for object in "${!objects[@]}"; do -+ if [[ ${objects["$object"]} != *createddestroyed ]]; then -+ echo "Error: $object: merely ${objects["$object"]}" >&3 -+ alldeleted=0 -+ fi -+done -+[[ $alldeleted -eq 1 ]] -+pretty "" "Objects that were created were also destroyed." diff --git a/target/linux/generic/backport-5.4/080-wireguard-0073-wireguard-selftests-import-harness-makefile-for-test.patch b/target/linux/generic/backport-5.4/080-wireguard-0073-wireguard-selftests-import-harness-makefile-for-test.patch deleted file mode 100644 index ca3853aa19..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0073-wireguard-selftests-import-harness-makefile-for-test.patch +++ /dev/null @@ -1,1078 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 15 Dec 2019 22:08:00 +0100 -Subject: [PATCH] wireguard: selftests: import harness makefile for test suite - -commit 65d88d04114bca7d85faebd5fed61069cb2b632c upstream. - -WireGuard has been using this on build.wireguard.com for the last -several years with considerable success. It allows for very quick and -iterative development cycles, and supports several platforms. - -To run the test suite on your current platform in QEMU: - - $ make -C tools/testing/selftests/wireguard/qemu -j$(nproc) - -To run it with KASAN and such turned on: - - $ DEBUG_KERNEL=yes make -C tools/testing/selftests/wireguard/qemu -j$(nproc) - -To run it emulated for another platform in QEMU: - - $ ARCH=arm make -C tools/testing/selftests/wireguard/qemu -j$(nproc) - -At the moment, we support aarch64_be, aarch64, arm, armeb, i686, m68k, -mips64, mips64el, mips, mipsel, powerpc64le, powerpc, and x86_64. - -The system supports incremental rebuilding, so it should be very fast to -change a single file and then test it out and have immediate feedback. - -This requires for the right toolchain and qemu to be installed prior. -I've had success with those from musl.cc. - -This is tailored for WireGuard at the moment, though later projects -might generalize it for other network testing. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - .../selftests/wireguard/qemu/.gitignore | 2 + - .../testing/selftests/wireguard/qemu/Makefile | 385 ++++++++++++++++++ - .../wireguard/qemu/arch/aarch64.config | 5 + - .../wireguard/qemu/arch/aarch64_be.config | 6 + - .../selftests/wireguard/qemu/arch/arm.config | 9 + - .../wireguard/qemu/arch/armeb.config | 10 + - .../selftests/wireguard/qemu/arch/i686.config | 5 + - .../selftests/wireguard/qemu/arch/m68k.config | 9 + - .../selftests/wireguard/qemu/arch/mips.config | 11 + - .../wireguard/qemu/arch/mips64.config | 14 + - .../wireguard/qemu/arch/mips64el.config | 15 + - .../wireguard/qemu/arch/mipsel.config | 12 + - .../wireguard/qemu/arch/powerpc.config | 10 + - .../wireguard/qemu/arch/powerpc64le.config | 12 + - .../wireguard/qemu/arch/x86_64.config | 5 + - .../selftests/wireguard/qemu/debug.config | 67 +++ - tools/testing/selftests/wireguard/qemu/init.c | 284 +++++++++++++ - .../selftests/wireguard/qemu/kernel.config | 86 ++++ - 18 files changed, 947 insertions(+) - create mode 100644 tools/testing/selftests/wireguard/qemu/.gitignore - create mode 100644 tools/testing/selftests/wireguard/qemu/Makefile - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/arm.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/armeb.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/i686.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/m68k.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64el.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mipsel.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config - create mode 100644 tools/testing/selftests/wireguard/qemu/arch/x86_64.config - create mode 100644 tools/testing/selftests/wireguard/qemu/debug.config - create mode 100644 tools/testing/selftests/wireguard/qemu/init.c - create mode 100644 tools/testing/selftests/wireguard/qemu/kernel.config - ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/.gitignore -@@ -0,0 +1,2 @@ -+build/ -+distfiles/ ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/Makefile -@@ -0,0 +1,385 @@ -+# SPDX-License-Identifier: GPL-2.0 -+# -+# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ -+PWD := $(shell pwd) -+ -+CHOST := $(shell gcc -dumpmachine) -+ifneq (,$(ARCH)) -+CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) -+ifeq (,$(CBUILD)) -+$(error The toolchain for $(ARCH) is not installed) -+endif -+else -+CBUILD := $(CHOST) -+ARCH := $(firstword $(subst -, ,$(CBUILD))) -+endif -+ -+# Set these from the environment to override -+KERNEL_PATH ?= $(PWD)/../../../../.. -+BUILD_PATH ?= $(PWD)/build/$(ARCH) -+DISTFILES_PATH ?= $(PWD)/distfiles -+NR_CPUS ?= 4 -+ -+MIRROR := https://download.wireguard.com/qemu-test/distfiles/ -+ -+default: qemu -+ -+# variable name, tarball project name, version, tarball extension, default URI base -+define tar_download = -+$(1)_VERSION := $(3) -+$(1)_NAME := $(2)-$$($(1)_VERSION) -+$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4) -+$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) -+$(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) -+endef -+ -+define file_download = -+$(DISTFILES_PATH)/$(1): -+ mkdir -p $(DISTFILES_PATH) -+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' -+ if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi -+endef -+ -+$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61)) -+$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) -+$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f)) -+$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) -+$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2)) -+$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5)) -+$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21)) -+$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a)) -+$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071)) -+ -+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) -+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) -+ -+export CFLAGS ?= -O3 -pipe -+export LDFLAGS ?= -+export CPPFLAGS := -I$(BUILD_PATH)/include -+ -+ifeq ($(CHOST),$(CBUILD)) -+CROSS_COMPILE_FLAG := --host=$(CHOST) -+NOPIE_GCC := gcc -fno-PIE -+CFLAGS += -march=native -+STRIP := strip -+else -+$(info Cross compilation: building for $(CBUILD) using $(CHOST)) -+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) -+export CROSS_COMPILE=$(CBUILD)- -+NOPIE_GCC := $(CBUILD)-gcc -fno-PIE -+STRIP := $(CBUILD)-strip -+endif -+ifeq ($(ARCH),aarch64) -+QEMU_ARCH := aarch64 -+KERNEL_ARCH := arm64 -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm -+else -+QEMU_MACHINE := -cpu cortex-a53 -machine virt -+CFLAGS += -march=armv8-a -mtune=cortex-a53 -+endif -+else ifeq ($(ARCH),aarch64_be) -+QEMU_ARCH := aarch64 -+KERNEL_ARCH := arm64 -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm -+else -+QEMU_MACHINE := -cpu cortex-a53 -machine virt -+CFLAGS += -march=armv8-a -mtune=cortex-a53 -+endif -+else ifeq ($(ARCH),arm) -+QEMU_ARCH := arm -+KERNEL_ARCH := arm -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm -+else -+QEMU_MACHINE := -cpu cortex-a15 -machine virt -+CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux -+endif -+else ifeq ($(ARCH),armeb) -+QEMU_ARCH := arm -+KERNEL_ARCH := arm -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm -+else -+QEMU_MACHINE := -cpu cortex-a15 -machine virt -+CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. -+LDFLAGS += -Wl,--be8 -+endif -+else ifeq ($(ARCH),x86_64) -+QEMU_ARCH := x86_64 -+KERNEL_ARCH := x86_64 -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine q35,accel=kvm -+else -+QEMU_MACHINE := -cpu Skylake-Server -machine q35 -+CFLAGS += -march=skylake-avx512 -+endif -+else ifeq ($(ARCH),i686) -+QEMU_ARCH := i386 -+KERNEL_ARCH := x86 -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage -+ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST)) -+QEMU_MACHINE := -cpu host -machine q35,accel=kvm -+else -+QEMU_MACHINE := -cpu coreduo -machine q35 -+CFLAGS += -march=prescott -+endif -+else ifeq ($(ARCH),mips64) -+QEMU_ARCH := mips64 -+KERNEL_ARCH := mips -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine malta,accel=kvm -+CFLAGS += -EB -+else -+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 -+CFLAGS += -march=mips64r2 -EB -+endif -+else ifeq ($(ARCH),mips64el) -+QEMU_ARCH := mips64el -+KERNEL_ARCH := mips -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine malta,accel=kvm -+CFLAGS += -EL -+else -+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 -+CFLAGS += -march=mips64r2 -EL -+endif -+else ifeq ($(ARCH),mips) -+QEMU_ARCH := mips -+KERNEL_ARCH := mips -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine malta,accel=kvm -+CFLAGS += -EB -+else -+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 -+CFLAGS += -march=mips32r2 -EB -+endif -+else ifeq ($(ARCH),mipsel) -+QEMU_ARCH := mipsel -+KERNEL_ARCH := mips -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host -machine malta,accel=kvm -+CFLAGS += -EL -+else -+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 -+CFLAGS += -march=mips32r2 -EL -+endif -+else ifeq ($(ARCH),powerpc64le) -+QEMU_ARCH := ppc64 -+KERNEL_ARCH := powerpc -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries -+else -+QEMU_MACHINE := -machine pseries -+endif -+CFLAGS += -mcpu=powerpc64le -mlong-double-64 -+else ifeq ($(ARCH),powerpc) -+QEMU_ARCH := ppc -+KERNEL_ARCH := powerpc -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 -+else -+QEMU_MACHINE := -machine ppce500 -+endif -+CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt -+else ifeq ($(ARCH),m68k) -+QEMU_ARCH := m68k -+KERNEL_ARCH := m68k -+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux -+ifeq ($(CHOST),$(CBUILD)) -+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -+else -+QEMU_MACHINE := -machine q800 -+endif -+else -+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) -+endif -+ -+REAL_CC := $(CBUILD)-gcc -+MUSL_CC := $(BUILD_PATH)/musl-gcc -+export CC := $(MUSL_CC) -+USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed -+ -+build: $(KERNEL_BZIMAGE) -+qemu: $(KERNEL_BZIMAGE) -+ rm -f $(BUILD_PATH)/result -+ timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ -+ -nodefaults \ -+ -nographic \ -+ -smp $(NR_CPUS) \ -+ $(QEMU_MACHINE) \ -+ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ -+ -serial stdio \ -+ -serial file:$(BUILD_PATH)/result \ -+ -no-reboot \ -+ -monitor none \ -+ -kernel $< -+ grep -Fq success $(BUILD_PATH)/result -+ -+$(BUILD_PATH)/init-cpio-spec.txt: -+ mkdir -p $(BUILD_PATH) -+ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ -+ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ -+ echo "dir /dev 755 0 0" >> $@ -+ echo "nod /dev/console 644 0 0 c 5 1" >> $@ -+ echo "dir /bin 755 0 0" >> $@ -+ echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ -+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@ -+ echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ -+ echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ -+ echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ -+ echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ -+ echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ -+ echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@ -+ echo "slink /bin/iptables xtables-multi 777 0 0" >> $@ -+ echo "slink /bin/ping6 ping 777 0 0" >> $@ -+ echo "dir /lib 755 0 0" >> $@ -+ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ -+ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ -+ -+$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config -+ mkdir -p $(KERNEL_BUILD_PATH) -+ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config -+ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config -+ cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig -+ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config -+ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) -+ -+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" -+ -+$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install -+ touch $@ -+ -+$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) -+ $(MAKE) -C $(MUSL_PATH) -+ $(STRIP) -s $@ -+ -+$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so -+ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers -+ touch $@ -+ -+$(MUSL_CC): $(MUSL_PATH)/lib/libc.so -+ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs -+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc -+ chmod +x $(BUILD_PATH)/musl-gcc -+ -+$(IPERF_PATH)/.installed: $(IPERF_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h -+ sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* -+ touch $@ -+ -+$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) -+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared -+ $(MAKE) -C $(IPERF_PATH) -+ $(STRIP) -s $@ -+ -+$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ touch $@ -+ -+$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) -+ cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared -+ $(MAKE) -C $(LIBMNL_PATH) -+ sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc -+ -+$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ touch $@ -+ -+$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg -+ $(STRIP) -s $@ -+ -+$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) -+ mkdir -p $(BUILD_PATH) -+ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< -+ $(STRIP) -s $@ -+ -+$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ touch $@ -+ -+$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) -+ $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping -+ $(STRIP) -s $@ -+ -+$(BASH_PATH)/.installed: $(BASH_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ touch $@ -+ -+$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) -+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble -+ $(MAKE) -C $(BASH_PATH) -+ $(STRIP) -s $@ -+ -+$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk -+ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile -+ touch $@ -+ -+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip -+ $(STRIP) -s $(IPROUTE2_PATH)/ip/ip -+ -+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss -+ $(STRIP) -s $(IPROUTE2_PATH)/misc/ss -+ -+$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure -+ touch $@ -+ -+$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+ cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include -+ $(MAKE) -C $(IPTABLES_PATH) -+ $(STRIP) -s $@ -+ -+$(NMAP_PATH)/.installed: $(NMAP_TAR) -+ mkdir -p $(BUILD_PATH) -+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -+ touch $@ -+ -+$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) -+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux -+ $(MAKE) -C $(NMAP_PATH) build-ncat -+ $(STRIP) -s $@ -+ -+clean: -+ rm -rf $(BUILD_PATH) -+ -+distclean: clean -+ rm -rf $(DISTFILES_PATH) -+ -+menuconfig: $(KERNEL_BUILD_PATH)/.config -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig -+ -+.PHONY: qemu build clean distclean menuconfig -+.DELETE_ON_ERROR: ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config -@@ -0,0 +1,5 @@ -+CONFIG_SERIAL_AMBA_PL011=y -+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config -@@ -0,0 +1,6 @@ -+CONFIG_CPU_BIG_ENDIAN=y -+CONFIG_SERIAL_AMBA_PL011=y -+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config -@@ -0,0 +1,9 @@ -+CONFIG_MMU=y -+CONFIG_ARCH_MULTI_V7=y -+CONFIG_ARCH_VIRT=y -+CONFIG_THUMB2_KERNEL=n -+CONFIG_SERIAL_AMBA_PL011=y -+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config -@@ -0,0 +1,10 @@ -+CONFIG_MMU=y -+CONFIG_ARCH_MULTI_V7=y -+CONFIG_ARCH_VIRT=y -+CONFIG_THUMB2_KERNEL=n -+CONFIG_SERIAL_AMBA_PL011=y -+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" -+CONFIG_CPU_BIG_ENDIAN=y -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config -@@ -0,0 +1,5 @@ -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config -@@ -0,0 +1,9 @@ -+CONFIG_MMU=y -+CONFIG_M68040=y -+CONFIG_MAC=y -+CONFIG_SERIAL_PMACZILOG=y -+CONFIG_SERIAL_PMACZILOG_TTYS=y -+CONFIG_SERIAL_PMACZILOG_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config -@@ -0,0 +1,11 @@ -+CONFIG_CPU_MIPS32_R2=y -+CONFIG_MIPS_MALTA=y -+CONFIG_MIPS_CPS=y -+CONFIG_MIPS_FP_SUPPORT=y -+CONFIG_POWER_RESET=y -+CONFIG_POWER_RESET_SYSCON=y -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config -@@ -0,0 +1,14 @@ -+CONFIG_64BIT=y -+CONFIG_CPU_MIPS64_R2=y -+CONFIG_MIPS32_N32=y -+CONFIG_CPU_HAS_MSA=y -+CONFIG_MIPS_MALTA=y -+CONFIG_MIPS_CPS=y -+CONFIG_MIPS_FP_SUPPORT=y -+CONFIG_POWER_RESET=y -+CONFIG_POWER_RESET_SYSCON=y -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config -@@ -0,0 +1,15 @@ -+CONFIG_64BIT=y -+CONFIG_CPU_MIPS64_R2=y -+CONFIG_MIPS32_N32=y -+CONFIG_CPU_HAS_MSA=y -+CONFIG_MIPS_MALTA=y -+CONFIG_CPU_LITTLE_ENDIAN=y -+CONFIG_MIPS_CPS=y -+CONFIG_MIPS_FP_SUPPORT=y -+CONFIG_POWER_RESET=y -+CONFIG_POWER_RESET_SYSCON=y -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config -@@ -0,0 +1,12 @@ -+CONFIG_CPU_MIPS32_R2=y -+CONFIG_MIPS_MALTA=y -+CONFIG_CPU_LITTLE_ENDIAN=y -+CONFIG_MIPS_CPS=y -+CONFIG_MIPS_FP_SUPPORT=y -+CONFIG_POWER_RESET=y -+CONFIG_POWER_RESET_SYSCON=y -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config -@@ -0,0 +1,10 @@ -+CONFIG_PPC_QEMU_E500=y -+CONFIG_FSL_SOC_BOOKE=y -+CONFIG_PPC_85xx=y -+CONFIG_PHYS_64BIT=y -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_MATH_EMULATION=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1024 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config -@@ -0,0 +1,12 @@ -+CONFIG_PPC64=y -+CONFIG_PPC_PSERIES=y -+CONFIG_ALTIVEC=y -+CONFIG_VSX=y -+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y -+CONFIG_PPC_RADIX_MMU=y -+CONFIG_HVC_CONSOLE=y -+CONFIG_CPU_LITTLE_ENDIAN=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" -+CONFIG_SECTION_MISMATCH_WARN_ONLY=y -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config -@@ -0,0 +1,5 @@ -+CONFIG_SERIAL_8250=y -+CONFIG_SERIAL_8250_CONSOLE=y -+CONFIG_CMDLINE_BOOL=y -+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" -+CONFIG_FRAME_WARN=1280 ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/debug.config -@@ -0,0 +1,67 @@ -+CONFIG_LOCALVERSION="-debug" -+CONFIG_ENABLE_WARN_DEPRECATED=y -+CONFIG_ENABLE_MUST_CHECK=y -+CONFIG_FRAME_POINTER=y -+CONFIG_STACK_VALIDATION=y -+CONFIG_DEBUG_KERNEL=y -+CONFIG_DEBUG_INFO=y -+CONFIG_DEBUG_INFO_DWARF4=y -+CONFIG_PAGE_EXTENSION=y -+CONFIG_PAGE_POISONING=y -+CONFIG_DEBUG_OBJECTS=y -+CONFIG_DEBUG_OBJECTS_FREE=y -+CONFIG_DEBUG_OBJECTS_TIMERS=y -+CONFIG_DEBUG_OBJECTS_WORK=y -+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y -+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 -+CONFIG_SLUB_DEBUG_ON=y -+CONFIG_DEBUG_VM=y -+CONFIG_DEBUG_MEMORY_INIT=y -+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y -+CONFIG_DEBUG_STACKOVERFLOW=y -+CONFIG_HAVE_ARCH_KMEMCHECK=y -+CONFIG_HAVE_ARCH_KASAN=y -+CONFIG_KASAN=y -+CONFIG_KASAN_INLINE=y -+CONFIG_UBSAN=y -+CONFIG_UBSAN_SANITIZE_ALL=y -+CONFIG_UBSAN_NO_ALIGNMENT=y -+CONFIG_UBSAN_NULL=y -+CONFIG_DEBUG_KMEMLEAK=y -+CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 -+CONFIG_DEBUG_STACK_USAGE=y -+CONFIG_DEBUG_SHIRQ=y -+CONFIG_WQ_WATCHDOG=y -+CONFIG_SCHED_DEBUG=y -+CONFIG_SCHED_INFO=y -+CONFIG_SCHEDSTATS=y -+CONFIG_SCHED_STACK_END_CHECK=y -+CONFIG_DEBUG_TIMEKEEPING=y -+CONFIG_TIMER_STATS=y -+CONFIG_DEBUG_PREEMPT=y -+CONFIG_DEBUG_RT_MUTEXES=y -+CONFIG_DEBUG_SPINLOCK=y -+CONFIG_DEBUG_MUTEXES=y -+CONFIG_DEBUG_LOCK_ALLOC=y -+CONFIG_PROVE_LOCKING=y -+CONFIG_LOCKDEP=y -+CONFIG_DEBUG_ATOMIC_SLEEP=y -+CONFIG_TRACE_IRQFLAGS=y -+CONFIG_DEBUG_BUGVERBOSE=y -+CONFIG_DEBUG_LIST=y -+CONFIG_DEBUG_PI_LIST=y -+CONFIG_PROVE_RCU=y -+CONFIG_SPARSE_RCU_POINTER=y -+CONFIG_RCU_CPU_STALL_TIMEOUT=21 -+CONFIG_RCU_TRACE=y -+CONFIG_RCU_EQS_DEBUG=y -+CONFIG_USER_STACKTRACE_SUPPORT=y -+CONFIG_DEBUG_SG=y -+CONFIG_DEBUG_NOTIFIERS=y -+CONFIG_DOUBLEFAULT=y -+CONFIG_X86_DEBUG_FPU=y -+CONFIG_DEBUG_SECTION_MISMATCH=y -+CONFIG_DEBUG_PAGEALLOC=y -+CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y -+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/init.c -@@ -0,0 +1,284 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#define _GNU_SOURCE -+#include <unistd.h> -+#include <errno.h> -+#include <string.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <stdbool.h> -+#include <fcntl.h> -+#include <sys/wait.h> -+#include <sys/mount.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <sys/types.h> -+#include <sys/io.h> -+#include <sys/ioctl.h> -+#include <sys/reboot.h> -+#include <sys/utsname.h> -+#include <sys/sendfile.h> -+#include <linux/random.h> -+#include <linux/version.h> -+ -+__attribute__((noreturn)) static void poweroff(void) -+{ -+ fflush(stdout); -+ fflush(stderr); -+ reboot(RB_AUTOBOOT); -+ sleep(30); -+ fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); -+ exit(1); -+} -+ -+static void panic(const char *what) -+{ -+ fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); -+ poweroff(); -+} -+ -+#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") -+ -+static void print_banner(void) -+{ -+ struct utsname utsname; -+ int len; -+ -+ if (uname(&utsname) < 0) -+ panic("uname"); -+ -+ len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); -+ printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); -+} -+ -+static void seed_rng(void) -+{ -+ int fd; -+ struct { -+ int entropy_count; -+ int buffer_size; -+ unsigned char buffer[256]; -+ } entropy = { -+ .entropy_count = sizeof(entropy.buffer) * 8, -+ .buffer_size = sizeof(entropy.buffer), -+ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" -+ }; -+ -+ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) -+ panic("mknod(/dev/urandom)"); -+ fd = open("/dev/urandom", O_WRONLY); -+ if (fd < 0) -+ panic("open(urandom)"); -+ for (int i = 0; i < 256; ++i) { -+ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) -+ panic("ioctl(urandom)"); -+ } -+ close(fd); -+} -+ -+static void mount_filesystems(void) -+{ -+ pretty_message("[+] Mounting filesystems..."); -+ mkdir("/dev", 0755); -+ mkdir("/proc", 0755); -+ mkdir("/sys", 0755); -+ mkdir("/tmp", 0755); -+ mkdir("/run", 0755); -+ mkdir("/var", 0755); -+ if (mount("none", "/dev", "devtmpfs", 0, NULL)) -+ panic("devtmpfs mount"); -+ if (mount("none", "/proc", "proc", 0, NULL)) -+ panic("procfs mount"); -+ if (mount("none", "/sys", "sysfs", 0, NULL)) -+ panic("sysfs mount"); -+ if (mount("none", "/tmp", "tmpfs", 0, NULL)) -+ panic("tmpfs mount"); -+ if (mount("none", "/run", "tmpfs", 0, NULL)) -+ panic("tmpfs mount"); -+ if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) -+ ; /* Not a problem if it fails.*/ -+ if (symlink("/run", "/var/run")) -+ panic("run symlink"); -+ if (symlink("/proc/self/fd", "/dev/fd")) -+ panic("fd symlink"); -+} -+ -+static void enable_logging(void) -+{ -+ int fd; -+ pretty_message("[+] Enabling logging..."); -+ fd = open("/proc/sys/kernel/printk", O_WRONLY); -+ if (fd >= 0) { -+ if (write(fd, "9\n", 2) != 2) -+ panic("write(printk)"); -+ close(fd); -+ } -+ fd = open("/proc/sys/debug/exception-trace", O_WRONLY); -+ if (fd >= 0) { -+ if (write(fd, "1\n", 2) != 2) -+ panic("write(exception-trace)"); -+ close(fd); -+ } -+ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); -+ if (fd >= 0) { -+ if (write(fd, "1\n", 2) != 2) -+ panic("write(panic_on_warn)"); -+ close(fd); -+ } -+} -+ -+static void kmod_selftests(void) -+{ -+ FILE *file; -+ char line[2048], *start, *pass; -+ bool success = true; -+ pretty_message("[+] Module self-tests:"); -+ file = fopen("/proc/kmsg", "r"); -+ if (!file) -+ panic("fopen(kmsg)"); -+ if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) -+ panic("fcntl(kmsg, nonblock)"); -+ while (fgets(line, sizeof(line), file)) { -+ start = strstr(line, "wireguard: "); -+ if (!start) -+ continue; -+ start += 11; -+ *strchrnul(start, '\n') = '\0'; -+ if (strstr(start, "www.wireguard.com")) -+ break; -+ pass = strstr(start, ": pass"); -+ if (!pass || pass[6] != '\0') { -+ success = false; -+ printf(" \x1b[31m* %s\x1b[0m\n", start); -+ } else -+ printf(" \x1b[32m* %s\x1b[0m\n", start); -+ } -+ fclose(file); -+ if (!success) { -+ puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); -+ poweroff(); -+ } -+} -+ -+static void launch_tests(void) -+{ -+ char cmdline[4096], *success_dev; -+ int status, fd; -+ pid_t pid; -+ -+ pretty_message("[+] Launching tests..."); -+ pid = fork(); -+ if (pid == -1) -+ panic("fork"); -+ else if (pid == 0) { -+ execl("/init.sh", "init", NULL); -+ panic("exec"); -+ } -+ if (waitpid(pid, &status, 0) < 0) -+ panic("waitpid"); -+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { -+ pretty_message("[+] Tests successful! :-)"); -+ fd = open("/proc/cmdline", O_RDONLY); -+ if (fd < 0) -+ panic("open(/proc/cmdline)"); -+ if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) -+ panic("read(/proc/cmdline)"); -+ cmdline[sizeof(cmdline) - 1] = '\0'; -+ for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { -+ if (strncmp(success_dev, "wg.success=", 11)) -+ continue; -+ memcpy(success_dev + 11 - 5, "/dev/", 5); -+ success_dev += 11 - 5; -+ break; -+ } -+ if (!success_dev || !strlen(success_dev)) -+ panic("Unable to find success device"); -+ -+ fd = open(success_dev, O_WRONLY); -+ if (fd < 0) -+ panic("open(success_dev)"); -+ if (write(fd, "success\n", 8) != 8) -+ panic("write(success_dev)"); -+ close(fd); -+ } else { -+ const char *why = "unknown cause"; -+ int what = -1; -+ -+ if (WIFEXITED(status)) { -+ why = "exit code"; -+ what = WEXITSTATUS(status); -+ } else if (WIFSIGNALED(status)) { -+ why = "signal"; -+ what = WTERMSIG(status); -+ } -+ printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); -+ } -+} -+ -+static void ensure_console(void) -+{ -+ for (unsigned int i = 0; i < 1000; ++i) { -+ int fd = open("/dev/console", O_RDWR); -+ if (fd < 0) { -+ usleep(50000); -+ continue; -+ } -+ dup2(fd, 0); -+ dup2(fd, 1); -+ dup2(fd, 2); -+ close(fd); -+ if (write(1, "\0\0\0\0\n", 5) == 5) -+ return; -+ } -+ panic("Unable to open console device"); -+} -+ -+static void clear_leaks(void) -+{ -+ int fd; -+ -+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); -+ if (fd < 0) -+ return; -+ pretty_message("[+] Starting memory leak detection..."); -+ write(fd, "clear\n", 5); -+ close(fd); -+} -+ -+static void check_leaks(void) -+{ -+ int fd; -+ -+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); -+ if (fd < 0) -+ return; -+ pretty_message("[+] Scanning for memory leaks..."); -+ sleep(2); /* Wait for any grace periods. */ -+ write(fd, "scan\n", 5); -+ close(fd); -+ -+ fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); -+ if (fd < 0) -+ return; -+ if (sendfile(1, fd, NULL, 0x7ffff000) > 0) -+ panic("Memory leaks encountered"); -+ close(fd); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ seed_rng(); -+ ensure_console(); -+ print_banner(); -+ mount_filesystems(); -+ kmod_selftests(); -+ enable_logging(); -+ clear_leaks(); -+ launch_tests(); -+ check_leaks(); -+ poweroff(); -+ return 1; -+} ---- /dev/null -+++ b/tools/testing/selftests/wireguard/qemu/kernel.config -@@ -0,0 +1,86 @@ -+CONFIG_LOCALVERSION="" -+CONFIG_NET=y -+CONFIG_NETDEVICES=y -+CONFIG_NET_CORE=y -+CONFIG_NET_IPIP=y -+CONFIG_DUMMY=y -+CONFIG_VETH=y -+CONFIG_MULTIUSER=y -+CONFIG_NAMESPACES=y -+CONFIG_NET_NS=y -+CONFIG_UNIX=y -+CONFIG_INET=y -+CONFIG_IPV6=y -+CONFIG_NETFILTER=y -+CONFIG_NETFILTER_ADVANCED=y -+CONFIG_NF_CONNTRACK=y -+CONFIG_NF_NAT=y -+CONFIG_NETFILTER_XTABLES=y -+CONFIG_NETFILTER_XT_NAT=y -+CONFIG_NETFILTER_XT_MATCH_LENGTH=y -+CONFIG_NF_CONNTRACK_IPV4=y -+CONFIG_NF_NAT_IPV4=y -+CONFIG_IP_NF_IPTABLES=y -+CONFIG_IP_NF_FILTER=y -+CONFIG_IP_NF_NAT=y -+CONFIG_IP_ADVANCED_ROUTER=y -+CONFIG_IP_MULTIPLE_TABLES=y -+CONFIG_IPV6_MULTIPLE_TABLES=y -+CONFIG_TTY=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_SCRIPT=y -+CONFIG_VDSO=y -+CONFIG_VIRTUALIZATION=y -+CONFIG_HYPERVISOR_GUEST=y -+CONFIG_PARAVIRT=y -+CONFIG_KVM_GUEST=y -+CONFIG_PARAVIRT_SPINLOCKS=y -+CONFIG_PRINTK=y -+CONFIG_KALLSYMS=y -+CONFIG_BUG=y -+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y -+CONFIG_EMBEDDED=n -+CONFIG_BASE_FULL=y -+CONFIG_FUTEX=y -+CONFIG_SHMEM=y -+CONFIG_SLUB=y -+CONFIG_SPARSEMEM_VMEMMAP=y -+CONFIG_SMP=y -+CONFIG_SCHED_SMT=y -+CONFIG_SCHED_MC=y -+CONFIG_NUMA=y -+CONFIG_PREEMPT=y -+CONFIG_NO_HZ=y -+CONFIG_NO_HZ_IDLE=y -+CONFIG_NO_HZ_FULL=n -+CONFIG_HZ_PERIODIC=n -+CONFIG_HIGH_RES_TIMERS=y -+CONFIG_ARCH_RANDOM=y -+CONFIG_FILE_LOCKING=y -+CONFIG_POSIX_TIMERS=y -+CONFIG_DEVTMPFS=y -+CONFIG_PROC_FS=y -+CONFIG_PROC_SYSCTL=y -+CONFIG_SYSFS=y -+CONFIG_TMPFS=y -+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 -+CONFIG_PRINTK_TIME=y -+CONFIG_BLK_DEV_INITRD=y -+CONFIG_LEGACY_VSYSCALL_NONE=y -+CONFIG_KERNEL_GZIP=y -+CONFIG_PANIC_ON_OOPS=y -+CONFIG_BUG_ON_DATA_CORRUPTION=y -+CONFIG_LOCKUP_DETECTOR=y -+CONFIG_SOFTLOCKUP_DETECTOR=y -+CONFIG_HARDLOCKUP_DETECTOR=y -+CONFIG_WQ_WATCHDOG=y -+CONFIG_DETECT_HUNG_TASK=y -+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y -+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y -+CONFIG_PANIC_TIMEOUT=-1 -+CONFIG_STACKTRACE=y -+CONFIG_EARLY_PRINTK=y -+CONFIG_GDB_SCRIPTS=y -+CONFIG_WIREGUARD=y -+CONFIG_WIREGUARD_DEBUG=y diff --git a/target/linux/generic/backport-5.4/080-wireguard-0074-wireguard-Kconfig-select-parent-dependency-for-crypt.patch b/target/linux/generic/backport-5.4/080-wireguard-0074-wireguard-Kconfig-select-parent-dependency-for-crypt.patch deleted file mode 100644 index c2f8f77f53..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0074-wireguard-Kconfig-select-parent-dependency-for-crypt.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Sun, 15 Dec 2019 22:08:01 +0100 -Subject: [PATCH] wireguard: Kconfig: select parent dependency for crypto - -commit d7c68a38bb4f9b7c1a2e4a772872c752ee5c44a6 upstream. - -This fixes the crypto selection submenu depenencies. Otherwise, we'd -wind up issuing warnings in which certain dependencies we also select -couldn't be satisfied. This condition was triggered by the addition of -the test suite autobuilder in the previous commit. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/Kconfig | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/net/Kconfig -+++ b/drivers/net/Kconfig -@@ -85,6 +85,8 @@ config WIREGUARD - select CRYPTO_POLY1305_X86_64 if X86 && 64BIT - select CRYPTO_BLAKE2S_X86 if X86 && 64BIT - select CRYPTO_CURVE25519_X86 if X86 && 64BIT -+ select ARM_CRYPTO if ARM -+ select ARM64_CRYPTO if ARM64 - select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON - select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON - select CRYPTO_POLY1305_ARM if ARM diff --git a/target/linux/generic/backport-5.4/080-wireguard-0075-wireguard-global-fix-spelling-mistakes-in-comments.patch b/target/linux/generic/backport-5.4/080-wireguard-0075-wireguard-global-fix-spelling-mistakes-in-comments.patch deleted file mode 100644 index 9b34e663a9..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0075-wireguard-global-fix-spelling-mistakes-in-comments.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Josh Soref <jsoref@gmail.com> -Date: Sun, 15 Dec 2019 22:08:02 +0100 -Subject: [PATCH] wireguard: global: fix spelling mistakes in comments - -commit a2ec8b5706944d228181c8b91d815f41d6dd8e7b upstream. - -This fixes two spelling errors in source code comments. - -Signed-off-by: Josh Soref <jsoref@gmail.com> -[Jason: rewrote commit message] -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 2 +- - include/uapi/linux/wireguard.h | 8 ++++---- - 2 files changed, 5 insertions(+), 5 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -380,7 +380,7 @@ static void wg_packet_consume_data_done( - /* We've already verified the Poly1305 auth tag, which means this packet - * was not modified in transit. We can therefore tell the networking - * stack that all checksums of every layer of encapsulation have already -- * been checked "by the hardware" and therefore is unneccessary to check -+ * been checked "by the hardware" and therefore is unnecessary to check - * again in software. - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; ---- a/include/uapi/linux/wireguard.h -+++ b/include/uapi/linux/wireguard.h -@@ -18,13 +18,13 @@ - * one but not both of: - * - * WGDEVICE_A_IFINDEX: NLA_U32 -- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * - * The kernel will then return several messages (NLM_F_MULTI) containing the - * following tree of nested items: - * - * WGDEVICE_A_IFINDEX: NLA_U32 -- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN - * WGDEVICE_A_LISTEN_PORT: NLA_U16 -@@ -77,7 +77,7 @@ - * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: - * - * WGDEVICE_A_IFINDEX: NLA_U32 -- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1 -+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 - * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current - * peers should be removed prior to adding the list below. - * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove -@@ -121,7 +121,7 @@ - * filling in information not contained in the prior. Note that if - * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably - * should not be specified in fragments that come after, so that the list -- * of peers is only cleared the first time but appened after. Likewise for -+ * of peers is only cleared the first time but appended after. Likewise for - * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message - * of a peer, it likely should not be specified in subsequent fragments. - * diff --git a/target/linux/generic/backport-5.4/080-wireguard-0076-wireguard-main-remove-unused-include-linux-version.h.patch b/target/linux/generic/backport-5.4/080-wireguard-0076-wireguard-main-remove-unused-include-linux-version.h.patch deleted file mode 100644 index 3cc0b56c3e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0076-wireguard-main-remove-unused-include-linux-version.h.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: YueHaibing <yuehaibing@huawei.com> -Date: Sun, 15 Dec 2019 22:08:03 +0100 -Subject: [PATCH] wireguard: main: remove unused include <linux/version.h> - -commit 43967b6ff91e53bcce5ae08c16a0588a475b53a1 upstream. - -Remove <linux/version.h> from the includes for main.c, which is unused. - -Signed-off-by: YueHaibing <yuehaibing@huawei.com> -[Jason: reworded commit message] -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/main.c | 1 - - 1 file changed, 1 deletion(-) - ---- a/drivers/net/wireguard/main.c -+++ b/drivers/net/wireguard/main.c -@@ -12,7 +12,6 @@ - - #include <uapi/linux/wireguard.h> - --#include <linux/version.h> - #include <linux/init.h> - #include <linux/module.h> - #include <linux/genetlink.h> diff --git a/target/linux/generic/backport-5.4/080-wireguard-0077-wireguard-allowedips-use-kfree_rcu-instead-of-call_r.patch b/target/linux/generic/backport-5.4/080-wireguard-0077-wireguard-allowedips-use-kfree_rcu-instead-of-call_r.patch deleted file mode 100644 index edd90484dd..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0077-wireguard-allowedips-use-kfree_rcu-instead-of-call_r.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wei Yongjun <weiyongjun1@huawei.com> -Date: Sun, 15 Dec 2019 22:08:04 +0100 -Subject: [PATCH] wireguard: allowedips: use kfree_rcu() instead of call_rcu() - -commit d89ee7d5c73af15c1c6f12b016cdf469742b5726 upstream. - -The callback function of call_rcu() just calls a kfree(), so we -can use kfree_rcu() instead of call_rcu() + callback function. - -Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/allowedips.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - ---- a/drivers/net/wireguard/allowedips.c -+++ b/drivers/net/wireguard/allowedips.c -@@ -31,11 +31,6 @@ static void copy_and_assign_cidr(struct - #define CHOOSE_NODE(parent, key) \ - parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] - --static void node_free_rcu(struct rcu_head *rcu) --{ -- kfree(container_of(rcu, struct allowedips_node, rcu)); --} -- - static void push_rcu(struct allowedips_node **stack, - struct allowedips_node __rcu *p, unsigned int *len) - { -@@ -112,7 +107,7 @@ static void walk_remove_by_peer(struct a - if (!node->bit[0] || !node->bit[1]) { - rcu_assign_pointer(*nptr, DEREF( - &node->bit[!REF(node->bit[0])])); -- call_rcu(&node->rcu, node_free_rcu); -+ kfree_rcu(node, rcu); - node = DEREF(nptr); - } - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0078-wireguard-selftests-remove-ancient-kernel-compatibil.patch b/target/linux/generic/backport-5.4/080-wireguard-0078-wireguard-selftests-remove-ancient-kernel-compatibil.patch deleted file mode 100644 index 6ff0dd9d10..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0078-wireguard-selftests-remove-ancient-kernel-compatibil.patch +++ /dev/null @@ -1,373 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 2 Jan 2020 17:47:49 +0100 -Subject: [PATCH] wireguard: selftests: remove ancient kernel compatibility - code - -commit 9a69a4c8802adf642bc4a13d471b5a86b44ed434 upstream. - -Quite a bit of the test suite was designed to work with ancient kernels. -Thankfully we no longer have to deal with this. This commit updates -things that we can finally update and removes things that we can finally -remove, to avoid the build-up of the last several years as a result of -having to support ancient kernels. We can finally rely on suppress_ -prefixlength being available. On the build side of things, the no-PIE -hack is no longer required, and we can bump some of the tools, repair -our m68k and i686-kvm support, and get better coverage of the static -branches used in the crypto lib and in udp_tunnel. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 11 +-- - .../testing/selftests/wireguard/qemu/Makefile | 82 ++++++++++--------- - .../selftests/wireguard/qemu/arch/m68k.config | 2 +- - tools/testing/selftests/wireguard/qemu/init.c | 1 + - .../selftests/wireguard/qemu/kernel.config | 2 + - 5 files changed, 50 insertions(+), 48 deletions(-) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -37,7 +37,7 @@ n2() { pretty 2 "$*"; maybe_exec ip netn - ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } - ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } - ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } --sleep() { read -t "$1" -N 0 || true; } -+sleep() { read -t "$1" -N 1 || true; } - waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } - waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } - waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } -@@ -294,12 +294,9 @@ ip1 -6 rule add table main suppress_pref - ip1 -4 route add default dev wg0 table 51820 - ip1 -4 rule add not fwmark 51820 table 51820 - ip1 -4 rule add table main suppress_prefixlength 0 --# suppress_prefixlength only got added in 3.12, and we want to support 3.10+. --if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then -- # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. -- n1 ping -W 1 -c 100 -f 192.168.99.7 -- n1 ping -W 1 -c 100 -f abab::1111 --fi -+# Flood the pings instead of sending just one, to trigger routing table reference counting bugs. -+n1 ping -W 1 -c 100 -f 192.168.99.7 -+n1 ping -W 1 -c 100 -f abab::1111 - - n0 iptables -t nat -F - ip0 link del vethrc ---- a/tools/testing/selftests/wireguard/qemu/Makefile -+++ b/tools/testing/selftests/wireguard/qemu/Makefile -@@ -5,6 +5,7 @@ - PWD := $(shell pwd) - - CHOST := $(shell gcc -dumpmachine) -+HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) - ifneq (,$(ARCH)) - CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) - ifeq (,$(CBUILD)) -@@ -37,19 +38,19 @@ endef - define file_download = - $(DISTFILES_PATH)/$(1): - mkdir -p $(DISTFILES_PATH) -- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' -+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' - if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi - endef - --$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61)) -+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) - $(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) --$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f)) -+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) - $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) --$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2)) --$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5)) --$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21)) --$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a)) --$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071)) -+$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) -+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) -+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) -+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) - - KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) - rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -@@ -59,23 +60,21 @@ export CFLAGS ?= -O3 -pipe - export LDFLAGS ?= - export CPPFLAGS := -I$(BUILD_PATH)/include - --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - CROSS_COMPILE_FLAG := --host=$(CHOST) --NOPIE_GCC := gcc -fno-PIE - CFLAGS += -march=native - STRIP := strip - else - $(info Cross compilation: building for $(CBUILD) using $(CHOST)) - CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) - export CROSS_COMPILE=$(CBUILD)- --NOPIE_GCC := $(CBUILD)-gcc -fno-PIE - STRIP := $(CBUILD)-strip - endif - ifeq ($(ARCH),aarch64) - QEMU_ARCH := aarch64 - KERNEL_ARCH := arm64 - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm - else - QEMU_MACHINE := -cpu cortex-a53 -machine virt -@@ -85,7 +84,7 @@ else ifeq ($(ARCH),aarch64_be) - QEMU_ARCH := aarch64 - KERNEL_ARCH := arm64 - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm - else - QEMU_MACHINE := -cpu cortex-a53 -machine virt -@@ -95,7 +94,7 @@ else ifeq ($(ARCH),arm) - QEMU_ARCH := arm - KERNEL_ARCH := arm - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm - else - QEMU_MACHINE := -cpu cortex-a15 -machine virt -@@ -105,7 +104,7 @@ else ifeq ($(ARCH),armeb) - QEMU_ARCH := arm - KERNEL_ARCH := arm - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm - else - QEMU_MACHINE := -cpu cortex-a15 -machine virt -@@ -116,7 +115,7 @@ else ifeq ($(ARCH),x86_64) - QEMU_ARCH := x86_64 - KERNEL_ARCH := x86_64 - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine q35,accel=kvm - else - QEMU_MACHINE := -cpu Skylake-Server -machine q35 -@@ -126,7 +125,7 @@ else ifeq ($(ARCH),i686) - QEMU_ARCH := i386 - KERNEL_ARCH := x86 - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage --ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST)) -+ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) - QEMU_MACHINE := -cpu host -machine q35,accel=kvm - else - QEMU_MACHINE := -cpu coreduo -machine q35 -@@ -136,7 +135,7 @@ else ifeq ($(ARCH),mips64) - QEMU_ARCH := mips64 - KERNEL_ARCH := mips - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine malta,accel=kvm - CFLAGS += -EB - else -@@ -147,7 +146,7 @@ else ifeq ($(ARCH),mips64el) - QEMU_ARCH := mips64el - KERNEL_ARCH := mips - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine malta,accel=kvm - CFLAGS += -EL - else -@@ -158,7 +157,7 @@ else ifeq ($(ARCH),mips) - QEMU_ARCH := mips - KERNEL_ARCH := mips - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine malta,accel=kvm - CFLAGS += -EB - else -@@ -169,7 +168,7 @@ else ifeq ($(ARCH),mipsel) - QEMU_ARCH := mipsel - KERNEL_ARCH := mips - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host -machine malta,accel=kvm - CFLAGS += -EL - else -@@ -180,7 +179,7 @@ else ifeq ($(ARCH),powerpc64le) - QEMU_ARCH := ppc64 - KERNEL_ARCH := powerpc - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host,accel=kvm -machine pseries - else - QEMU_MACHINE := -machine pseries -@@ -190,7 +189,7 @@ else ifeq ($(ARCH),powerpc) - QEMU_ARCH := ppc - KERNEL_ARCH := powerpc - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage --ifeq ($(CHOST),$(CBUILD)) -+ifeq ($(HOST_ARCH),$(ARCH)) - QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 - else - QEMU_MACHINE := -machine ppce500 -@@ -200,10 +199,11 @@ else ifeq ($(ARCH),m68k) - QEMU_ARCH := m68k - KERNEL_ARCH := m68k - KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux --ifeq ($(CHOST),$(CBUILD)) --QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) -+ifeq ($(HOST_ARCH),$(ARCH)) -+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) - else --QEMU_MACHINE := -machine q800 -+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) - endif - else - $(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) -@@ -238,14 +238,14 @@ $(BUILD_PATH)/init-cpio-spec.txt: - echo "nod /dev/console 644 0 0 c 5 1" >> $@ - echo "dir /bin 755 0 0" >> $@ - echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ -- echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@ -+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ - echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ - echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ - echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ - echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ - echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ -- echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@ -- echo "slink /bin/iptables xtables-multi 777 0 0" >> $@ -+ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ -+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ - echo "slink /bin/ping6 ping 777 0 0" >> $@ - echo "dir /lib 755 0 0" >> $@ - echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ -@@ -260,8 +260,8 @@ $(KERNEL_BUILD_PATH)/.config: kernel.con - cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config - $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) - --$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) -- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" -+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) - - $(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config - $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install -@@ -280,7 +280,7 @@ $(BUILD_PATH)/include/.installed: $(MUSL - - $(MUSL_CC): $(MUSL_PATH)/lib/libc.so - sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs -- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc -+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc - chmod +x $(BUILD_PATH)/musl-gcc - - $(IPERF_PATH)/.installed: $(IPERF_TAR) -@@ -291,7 +291,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR) - touch $@ - - $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) -- cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared -+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no - $(MAKE) -C $(IPERF_PATH) - $(STRIP) -s $@ - -@@ -308,8 +308,8 @@ $(WIREGUARD_TOOLS_PATH)/.installed: $(WI - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - touch $@ - --$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg -+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg - $(STRIP) -s $@ - - $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) -@@ -323,7 +323,8 @@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TA - touch $@ - - $(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) -- $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping -+ sed -i /atexit/d $(IPUTILS_PATH)/ping.c -+ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS) - $(STRIP) -s $@ - - $(BASH_PATH)/.installed: $(BASH_TAR) -@@ -357,7 +358,7 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_ - sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure - touch $@ - --$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include - $(MAKE) -C $(IPTABLES_PATH) - $(STRIP) -s $@ -@@ -368,8 +369,9 @@ $(NMAP_PATH)/.installed: $(NMAP_TAR) - touch $@ - - $(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) -- cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux -- $(MAKE) -C $(NMAP_PATH) build-ncat -+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh -+ $(MAKE) -C $(NMAP_PATH)/libpcap -+ $(MAKE) -C $(NMAP_PATH)/ncat - $(STRIP) -s $@ - - clean: -@@ -379,7 +381,7 @@ distclean: clean - rm -rf $(DISTFILES_PATH) - - menuconfig: $(KERNEL_BUILD_PATH)/.config -- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig -+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig - - .PHONY: qemu build clean distclean menuconfig - .DELETE_ON_ERROR: ---- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config -+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config -@@ -1,9 +1,9 @@ - CONFIG_MMU=y -+CONFIG_M68KCLASSIC=y - CONFIG_M68040=y - CONFIG_MAC=y - CONFIG_SERIAL_PMACZILOG=y - CONFIG_SERIAL_PMACZILOG_TTYS=y - CONFIG_SERIAL_PMACZILOG_CONSOLE=y --CONFIG_CMDLINE_BOOL=y - CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" - CONFIG_FRAME_WARN=1024 ---- a/tools/testing/selftests/wireguard/qemu/init.c -+++ b/tools/testing/selftests/wireguard/qemu/init.c -@@ -21,6 +21,7 @@ - #include <sys/reboot.h> - #include <sys/utsname.h> - #include <sys/sendfile.h> -+#include <sys/sysmacros.h> - #include <linux/random.h> - #include <linux/version.h> - ---- a/tools/testing/selftests/wireguard/qemu/kernel.config -+++ b/tools/testing/selftests/wireguard/qemu/kernel.config -@@ -39,6 +39,7 @@ CONFIG_PRINTK=y - CONFIG_KALLSYMS=y - CONFIG_BUG=y - CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y -+CONFIG_JUMP_LABEL=y - CONFIG_EMBEDDED=n - CONFIG_BASE_FULL=y - CONFIG_FUTEX=y -@@ -55,6 +56,7 @@ CONFIG_NO_HZ_IDLE=y - CONFIG_NO_HZ_FULL=n - CONFIG_HZ_PERIODIC=n - CONFIG_HIGH_RES_TIMERS=y -+CONFIG_COMPAT_32BIT_TIME=y - CONFIG_ARCH_RANDOM=y - CONFIG_FILE_LOCKING=y - CONFIG_POSIX_TIMERS=y diff --git a/target/linux/generic/backport-5.4/080-wireguard-0079-wireguard-queueing-do-not-account-for-pfmemalloc-whe.patch b/target/linux/generic/backport-5.4/080-wireguard-0079-wireguard-queueing-do-not-account-for-pfmemalloc-whe.patch deleted file mode 100644 index fb03b1b1a6..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0079-wireguard-queueing-do-not-account-for-pfmemalloc-whe.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 2 Jan 2020 17:47:50 +0100 -Subject: [PATCH] wireguard: queueing: do not account for pfmemalloc when - clearing skb header - -commit 04d2ea92a18417619182cbb79063f154892b0150 upstream. - -Before 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_ -header()"), the pfmemalloc flag used to be between headers_start and -headers_end, which is a region we clear when preparing the packet for -encryption/decryption. This is a parameter we certainly want to -preserve, which is why 8b7008620b84 moved it out of there. The code here -was written in a world before 8b7008620b84, though, where we had to -manually account for it. This commit brings things up to speed. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/queueing.h | 3 --- - 1 file changed, 3 deletions(-) - ---- a/drivers/net/wireguard/queueing.h -+++ b/drivers/net/wireguard/queueing.h -@@ -83,13 +83,10 @@ static inline __be16 wg_skb_examine_untr - - static inline void wg_reset_packet(struct sk_buff *skb) - { -- const int pfmemalloc = skb->pfmemalloc; -- - skb_scrub_packet(skb, true); - memset(&skb->headers_start, 0, - offsetof(struct sk_buff, headers_end) - - offsetof(struct sk_buff, headers_start)); -- skb->pfmemalloc = pfmemalloc; - skb->queue_mapping = 0; - skb->nohdr = 0; - skb->peeked = 0; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0080-wireguard-socket-mark-skbs-as-not-on-list-when-recei.patch b/target/linux/generic/backport-5.4/080-wireguard-0080-wireguard-socket-mark-skbs-as-not-on-list-when-recei.patch deleted file mode 100644 index 779491c8db..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0080-wireguard-socket-mark-skbs-as-not-on-list-when-recei.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 2 Jan 2020 17:47:51 +0100 -Subject: [PATCH] wireguard: socket: mark skbs as not on list when receiving - via gro - -commit 736775d06bac60d7a353e405398b48b2bd8b1e54 upstream. - -Certain drivers will pass gro skbs to udp, at which point the udp driver -simply iterates through them and passes them off to encap_rcv, which is -where we pick up. At the moment, we're not attempting to coalesce these -into bundles, but we also don't want to wind up having cascaded lists of -skbs treated separately. The right behavior here, then, is to just mark -each incoming one as not on a list. This can be seen in practice, for -example, with Qualcomm's rmnet_perf driver. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Tested-by: Yaroslav Furman <yaro330@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/socket.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -333,6 +333,7 @@ static int wg_receive(struct sock *sk, s - wg = sk->sk_user_data; - if (unlikely(!wg)) - goto err; -+ skb_mark_not_on_list(skb); - wg_packet_receive(wg, skb); - return 0; - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0081-wireguard-allowedips-fix-use-after-free-in-root_remo.patch b/target/linux/generic/backport-5.4/080-wireguard-0081-wireguard-allowedips-fix-use-after-free-in-root_remo.patch deleted file mode 100644 index e77ab5834a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0081-wireguard-allowedips-fix-use-after-free-in-root_remo.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet <edumazet@google.com> -Date: Tue, 4 Feb 2020 22:17:25 +0100 -Subject: [PATCH] wireguard: allowedips: fix use-after-free in - root_remove_peer_lists - -commit 9981159fc3b677b357f84e069a11de5a5ec8a2a8 upstream. - -In the unlikely case a new node could not be allocated, we need to -remove @newnode from @peer->allowedips_list before freeing it. - -syzbot reported: - -BUG: KASAN: use-after-free in __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54 -Read of size 8 at addr ffff88809881a538 by task syz-executor.4/30133 - -CPU: 0 PID: 30133 Comm: syz-executor.4 Not tainted 5.5.0-syzkaller #0 -Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 -Call Trace: - __dump_stack lib/dump_stack.c:77 [inline] - dump_stack+0x197/0x210 lib/dump_stack.c:118 - print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 - __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506 - kasan_report+0x12/0x20 mm/kasan/common.c:639 - __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 - __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54 - __list_del_entry include/linux/list.h:132 [inline] - list_del include/linux/list.h:146 [inline] - root_remove_peer_lists+0x24f/0x4b0 drivers/net/wireguard/allowedips.c:65 - wg_allowedips_free+0x232/0x390 drivers/net/wireguard/allowedips.c:300 - wg_peer_remove_all+0xd5/0x620 drivers/net/wireguard/peer.c:187 - wg_set_device+0xd01/0x1350 drivers/net/wireguard/netlink.c:542 - genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] - genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] - genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 - netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 - genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 - netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] - netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 - netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 - sock_sendmsg_nosec net/socket.c:652 [inline] - sock_sendmsg+0xd7/0x130 net/socket.c:672 - ____sys_sendmsg+0x753/0x880 net/socket.c:2343 - ___sys_sendmsg+0x100/0x170 net/socket.c:2397 - __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 - __do_sys_sendmsg net/socket.c:2439 [inline] - __se_sys_sendmsg net/socket.c:2437 [inline] - __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 - do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 - entry_SYSCALL_64_after_hwframe+0x49/0xbe -RIP: 0033:0x45b399 -Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 -RSP: 002b:00007f99a9bcdc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e -RAX: ffffffffffffffda RBX: 00007f99a9bce6d4 RCX: 000000000045b399 -RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003 -RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 -R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 -R13: 00000000000009ba R14: 00000000004cb2b8 R15: 0000000000000009 - -Allocated by task 30103: - save_stack+0x23/0x90 mm/kasan/common.c:72 - set_track mm/kasan/common.c:80 [inline] - __kasan_kmalloc mm/kasan/common.c:513 [inline] - __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 - kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527 - kmem_cache_alloc_trace+0x158/0x790 mm/slab.c:3551 - kmalloc include/linux/slab.h:556 [inline] - kzalloc include/linux/slab.h:670 [inline] - add+0x70a/0x1970 drivers/net/wireguard/allowedips.c:236 - wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320 - set_allowedip drivers/net/wireguard/netlink.c:343 [inline] - set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468 - wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591 - genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] - genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] - genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 - netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 - genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 - netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] - netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 - netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 - sock_sendmsg_nosec net/socket.c:652 [inline] - sock_sendmsg+0xd7/0x130 net/socket.c:672 - ____sys_sendmsg+0x753/0x880 net/socket.c:2343 - ___sys_sendmsg+0x100/0x170 net/socket.c:2397 - __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 - __do_sys_sendmsg net/socket.c:2439 [inline] - __se_sys_sendmsg net/socket.c:2437 [inline] - __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 - do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 - entry_SYSCALL_64_after_hwframe+0x49/0xbe - -Freed by task 30103: - save_stack+0x23/0x90 mm/kasan/common.c:72 - set_track mm/kasan/common.c:80 [inline] - kasan_set_free_info mm/kasan/common.c:335 [inline] - __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 - kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 - __cache_free mm/slab.c:3426 [inline] - kfree+0x10a/0x2c0 mm/slab.c:3757 - add+0x12d2/0x1970 drivers/net/wireguard/allowedips.c:266 - wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320 - set_allowedip drivers/net/wireguard/netlink.c:343 [inline] - set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468 - wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591 - genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline] - genl_family_rcv_msg net/netlink/genetlink.c:717 [inline] - genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734 - netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 - genl_rcv+0x29/0x40 net/netlink/genetlink.c:745 - netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] - netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 - netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 - sock_sendmsg_nosec net/socket.c:652 [inline] - sock_sendmsg+0xd7/0x130 net/socket.c:672 - ____sys_sendmsg+0x753/0x880 net/socket.c:2343 - ___sys_sendmsg+0x100/0x170 net/socket.c:2397 - __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 - __do_sys_sendmsg net/socket.c:2439 [inline] - __se_sys_sendmsg net/socket.c:2437 [inline] - __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437 - do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 - entry_SYSCALL_64_after_hwframe+0x49/0xbe - -The buggy address belongs to the object at ffff88809881a500 - which belongs to the cache kmalloc-64 of size 64 -The buggy address is located 56 bytes inside of - 64-byte region [ffff88809881a500, ffff88809881a540) -The buggy address belongs to the page: -page:ffffea0002620680 refcount:1 mapcount:0 mapping:ffff8880aa400380 index:0x0 -raw: 00fffe0000000200 ffffea000250b748 ffffea000254bac8 ffff8880aa400380 -raw: 0000000000000000 ffff88809881a000 0000000100000020 0000000000000000 -page dumped because: kasan: bad access detected - -Memory state around the buggy address: - ffff88809881a400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc - ffff88809881a480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc ->ffff88809881a500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc - ^ - ffff88809881a580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc - ffff88809881a600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Eric Dumazet <edumazet@google.com> -Reported-by: syzbot <syzkaller@googlegroups.com> -Cc: Jason A. Donenfeld <Jason@zx2c4.com> -Cc: wireguard@lists.zx2c4.com -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/allowedips.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/net/wireguard/allowedips.c -+++ b/drivers/net/wireguard/allowedips.c -@@ -263,6 +263,7 @@ static int add(struct allowedips_node __ - } else { - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (unlikely(!node)) { -+ list_del(&newnode->peer_list); - kfree(newnode); - return -ENOMEM; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0082-wireguard-noise-reject-peers-with-low-order-public-k.patch b/target/linux/generic/backport-5.4/080-wireguard-0082-wireguard-noise-reject-peers-with-low-order-public-k.patch deleted file mode 100644 index 55bb276118..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0082-wireguard-noise-reject-peers-with-low-order-public-k.patch +++ /dev/null @@ -1,233 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 4 Feb 2020 22:17:26 +0100 -Subject: [PATCH] wireguard: noise: reject peers with low order public keys - -commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream. - -Our static-static calculation returns a failure if the public key is of -low order. We check for this when peers are added, and don't allow them -to be added if they're low order, except in the case where we haven't -yet been given a private key. In that case, we would defer the removal -of the peer until we're given a private key, since at that point we're -doing new static-static calculations which incur failures we can act on. -This meant, however, that we wound up removing peers rather late in the -configuration flow. - -Syzkaller points out that peer_remove calls flush_workqueue, which in -turn might then wait for sending a handshake initiation to complete. -Since handshake initiation needs the static identity lock, holding the -static identity lock while calling peer_remove can result in a rare -deadlock. We have precisely this case in this situation of late-stage -peer removal based on an invalid public key. We can't drop the lock when -removing, because then incoming handshakes might interact with a bogus -static-static calculation. - -While the band-aid patch for this would involve breaking up the peer -removal into two steps like wg_peer_remove_all does, in order to solve -the locking issue, there's actually a much more elegant way of fixing -this: - -If the static-static calculation succeeds with one private key, it -*must* succeed with all others, because all 32-byte strings map to valid -private keys, thanks to clamping. That means we can get rid of this -silly dance and locking headaches of removing peers late in the -configuration flow, and instead just reject them early on, regardless of -whether the device has yet been assigned a private key. For the case -where the device doesn't yet have a private key, we safely use zeros -just for the purposes of checking for low order points by way of -checking the output of the calculation. - -The following PoC will trigger the deadlock: - -ip link add wg0 type wireguard -ip addr add 10.0.0.1/24 dev wg0 -ip link set wg0 up -ping -f 10.0.0.2 & -while true; do - wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234 - wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=) -done - -[ 0.949105] ====================================================== -[ 0.949550] WARNING: possible circular locking dependency detected -[ 0.950143] 5.5.0-debug+ #18 Not tainted -[ 0.950431] ------------------------------------------------------ -[ 0.950959] wg/89 is trying to acquire lock: -[ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0 -[ 0.951865] -[ 0.951865] but task is already holding lock: -[ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 -[ 0.953011] -[ 0.953011] which lock already depends on the new lock. -[ 0.953011] -[ 0.953651] -[ 0.953651] the existing dependency chain (in reverse order) is: -[ 0.954292] -[ 0.954292] -> #2 (&wg->static_identity.lock){++++}: -[ 0.954804] lock_acquire+0x127/0x350 -[ 0.955133] down_read+0x83/0x410 -[ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700 -[ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280 -[ 0.956401] wg_packet_handshake_send_worker+0x10/0x20 -[ 0.956841] process_one_work+0x806/0x1500 -[ 0.957167] worker_thread+0x8c/0xcb0 -[ 0.957549] kthread+0x2ee/0x3b0 -[ 0.957792] ret_from_fork+0x24/0x30 -[ 0.958234] -[ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}: -[ 0.958808] lock_acquire+0x127/0x350 -[ 0.959075] process_one_work+0x7ab/0x1500 -[ 0.959369] worker_thread+0x8c/0xcb0 -[ 0.959639] kthread+0x2ee/0x3b0 -[ 0.959896] ret_from_fork+0x24/0x30 -[ 0.960346] -[ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}: -[ 0.960945] check_prev_add+0x167/0x1e20 -[ 0.961351] __lock_acquire+0x2012/0x3170 -[ 0.961725] lock_acquire+0x127/0x350 -[ 0.961990] flush_workqueue+0x106/0x12f0 -[ 0.962280] peer_remove_after_dead+0x160/0x220 -[ 0.962600] wg_set_device+0xa24/0xcc0 -[ 0.962994] genl_rcv_msg+0x52f/0xe90 -[ 0.963298] netlink_rcv_skb+0x111/0x320 -[ 0.963618] genl_rcv+0x1f/0x30 -[ 0.963853] netlink_unicast+0x3f6/0x610 -[ 0.964245] netlink_sendmsg+0x700/0xb80 -[ 0.964586] __sys_sendto+0x1dd/0x2c0 -[ 0.964854] __x64_sys_sendto+0xd8/0x1b0 -[ 0.965141] do_syscall_64+0x90/0xd9a -[ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe -[ 0.965769] -[ 0.965769] other info that might help us debug this: -[ 0.965769] -[ 0.966337] Chain exists of: -[ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock -[ 0.966337] -[ 0.967417] Possible unsafe locking scenario: -[ 0.967417] -[ 0.967836] CPU0 CPU1 -[ 0.968155] ---- ---- -[ 0.968497] lock(&wg->static_identity.lock); -[ 0.968779] lock((work_completion)(&peer->transmit_handshake_work)); -[ 0.969345] lock(&wg->static_identity.lock); -[ 0.969809] lock((wq_completion)wg-kex-wg0); -[ 0.970146] -[ 0.970146] *** DEADLOCK *** -[ 0.970146] -[ 0.970531] 5 locks held by wg/89: -[ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30 -[ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90 -[ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0 -[ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0 -[ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0 -[ 0.973653] -[ 0.973653] stack backtrace: -[ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18 -[ 0.974476] Call Trace: -[ 0.974638] dump_stack+0x97/0xe0 -[ 0.974869] check_noncircular+0x312/0x3e0 -[ 0.975132] ? print_circular_bug+0x1f0/0x1f0 -[ 0.975410] ? __kernel_text_address+0x9/0x30 -[ 0.975727] ? unwind_get_return_address+0x51/0x90 -[ 0.976024] check_prev_add+0x167/0x1e20 -[ 0.976367] ? graph_lock+0x70/0x160 -[ 0.976682] __lock_acquire+0x2012/0x3170 -[ 0.976998] ? register_lock_class+0x1140/0x1140 -[ 0.977323] lock_acquire+0x127/0x350 -[ 0.977627] ? flush_workqueue+0xe3/0x12f0 -[ 0.977890] flush_workqueue+0x106/0x12f0 -[ 0.978147] ? flush_workqueue+0xe3/0x12f0 -[ 0.978410] ? find_held_lock+0x2c/0x110 -[ 0.978662] ? lock_downgrade+0x6e0/0x6e0 -[ 0.978919] ? queue_rcu_work+0x60/0x60 -[ 0.979166] ? netif_napi_del+0x151/0x3b0 -[ 0.979501] ? peer_remove_after_dead+0x160/0x220 -[ 0.979871] peer_remove_after_dead+0x160/0x220 -[ 0.980232] wg_set_device+0xa24/0xcc0 -[ 0.980516] ? deref_stack_reg+0x8e/0xc0 -[ 0.980801] ? set_peer+0xe10/0xe10 -[ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150 -[ 0.981430] ? __nla_validate_parse+0x163/0x270 -[ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310 -[ 0.982078] genl_rcv_msg+0x52f/0xe90 -[ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 -[ 0.982690] ? register_lock_class+0x1140/0x1140 -[ 0.983049] netlink_rcv_skb+0x111/0x320 -[ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310 -[ 0.983645] ? netlink_ack+0x880/0x880 -[ 0.983888] genl_rcv+0x1f/0x30 -[ 0.984168] netlink_unicast+0x3f6/0x610 -[ 0.984443] ? netlink_detachskb+0x60/0x60 -[ 0.984729] ? find_held_lock+0x2c/0x110 -[ 0.984976] netlink_sendmsg+0x700/0xb80 -[ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60 -[ 0.985533] __sys_sendto+0x1dd/0x2c0 -[ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0 -[ 0.986039] ? sockfd_lookup_light+0x17/0x160 -[ 0.986397] ? __sys_recvmsg+0x8c/0xf0 -[ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0 -[ 0.987018] __x64_sys_sendto+0xd8/0x1b0 -[ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0 -[ 0.987666] do_syscall_64+0x90/0xd9a -[ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe -[ 0.988223] RIP: 0033:0x7fe77c12003e -[ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4 -[ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c -[ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e -[ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004 -[ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c -[ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c -[ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001 - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Reported-by: syzbot <syzkaller@googlegroups.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/netlink.c | 6 ++---- - drivers/net/wireguard/noise.c | 10 +++++++--- - 2 files changed, 9 insertions(+), 7 deletions(-) - ---- a/drivers/net/wireguard/netlink.c -+++ b/drivers/net/wireguard/netlink.c -@@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff - private_key); - list_for_each_entry_safe(peer, temp, &wg->peer_list, - peer_list) { -- if (wg_noise_precompute_static_static(peer)) -- wg_noise_expire_current_peer_keypairs(peer); -- else -- wg_peer_remove(peer); -+ BUG_ON(!wg_noise_precompute_static_static(peer)); -+ wg_noise_expire_current_peer_keypairs(peer); - } - wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); - up_write(&wg->static_identity.lock); ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -46,17 +46,21 @@ void __init wg_noise_init(void) - /* Must hold peer->handshake.static_identity->lock */ - bool wg_noise_precompute_static_static(struct wg_peer *peer) - { -- bool ret = true; -+ bool ret; - - down_write(&peer->handshake.lock); -- if (peer->handshake.static_identity->has_identity) -+ if (peer->handshake.static_identity->has_identity) { - ret = curve25519( - peer->handshake.precomputed_static_static, - peer->handshake.static_identity->static_private, - peer->handshake.remote_static); -- else -+ } else { -+ u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; -+ -+ ret = curve25519(empty, empty, peer->handshake.remote_static); - memset(peer->handshake.precomputed_static_static, 0, - NOISE_PUBLIC_KEY_LEN); -+ } - up_write(&peer->handshake.lock); - return ret; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0083-wireguard-selftests-ensure-non-addition-of-peers-wit.patch b/target/linux/generic/backport-5.4/080-wireguard-0083-wireguard-selftests-ensure-non-addition-of-peers-wit.patch deleted file mode 100644 index 86877a6590..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0083-wireguard-selftests-ensure-non-addition-of-peers-wit.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 4 Feb 2020 22:17:27 +0100 -Subject: [PATCH] wireguard: selftests: ensure non-addition of peers with - failed precomputation - -commit f9398acba6a4ae9cb98bfe4d56414d376eff8d57 upstream. - -Ensure that peers with low order points are ignored, both in the case -where we already have a device private key and in the case where we do -not. This adds points that naturally give a zero output. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 6 ++++++ - 1 file changed, 6 insertions(+) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -516,6 +516,12 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0 - n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 - n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 - n0 wg set wg0 peer "$pub2" allowed-ips ::/0 -+n0 wg set wg0 peer "$pub2" remove -+low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) -+n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } -+[[ -z $(n0 wg show wg0 peers) ]] -+n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } -+[[ -z $(n0 wg show wg0 peers) ]] - ip0 link del wg0 - - declare -A objects diff --git a/target/linux/generic/backport-5.4/080-wireguard-0084-wireguard-selftests-tie-socket-waiting-to-target-pid.patch b/target/linux/generic/backport-5.4/080-wireguard-0084-wireguard-selftests-tie-socket-waiting-to-target-pid.patch deleted file mode 100644 index 4530f0f49a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0084-wireguard-selftests-tie-socket-waiting-to-target-pid.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 4 Feb 2020 22:17:29 +0100 -Subject: [PATCH] wireguard: selftests: tie socket waiting to target pid - -commit 88f404a9b1d75388225b1c67b6dd327cb2182777 upstream. - -Without this, we wind up proceeding too early sometimes when the -previous process has just used the same listening port. So, we tie the -listening socket query to the specific pid we're interested in. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -38,9 +38,8 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 - ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } - ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } - sleep() { read -t "$1" -N 1 || true; } --waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; } --waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } --waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; } -+waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } -+waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } - waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } - - cleanup() { -@@ -119,22 +118,22 @@ tests() { - - # TCP over IPv4 - n2 iperf3 -s -1 -B 192.168.241.2 & -- waitiperf $netns2 -+ waitiperf $netns2 $! - n1 iperf3 -Z -t 3 -c 192.168.241.2 - - # TCP over IPv6 - n1 iperf3 -s -1 -B fd00::1 & -- waitiperf $netns1 -+ waitiperf $netns1 $! - n2 iperf3 -Z -t 3 -c fd00::1 - - # UDP over IPv4 - n1 iperf3 -s -1 -B 192.168.241.1 & -- waitiperf $netns1 -+ waitiperf $netns1 $! - n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 - - # UDP over IPv6 - n2 iperf3 -s -1 -B fd00::2 & -- waitiperf $netns2 -+ waitiperf $netns2 $! - n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 - } - -@@ -207,7 +206,7 @@ n1 ping -W 1 -c 1 192.168.241.2 - n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 - exec 4< <(n1 ncat -l -u -p 1111) - ncat_pid=$! --waitncatudp $netns1 -+waitncatudp $netns1 $ncat_pid - n2 ncat -u 192.168.241.1 1111 <<<"X" - read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] - kill $ncat_pid -@@ -216,7 +215,7 @@ n1 wg set wg0 peer "$more_specific_key" - n2 wg set wg0 listen-port 9997 - exec 4< <(n1 ncat -l -u -p 1111) - ncat_pid=$! --waitncatudp $netns1 -+waitncatudp $netns1 $ncat_pid - n2 ncat -u 192.168.241.1 1111 <<<"X" - ! read -r -N 1 -t 1 out <&4 || false - kill $ncat_pid diff --git a/target/linux/generic/backport-5.4/080-wireguard-0085-wireguard-device-use-icmp_ndo_send-helper.patch b/target/linux/generic/backport-5.4/080-wireguard-0085-wireguard-device-use-icmp_ndo_send-helper.patch deleted file mode 100644 index 321db189e1..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0085-wireguard-device-use-icmp_ndo_send-helper.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 11 Feb 2020 20:47:08 +0100 -Subject: [PATCH] wireguard: device: use icmp_ndo_send helper - -commit a12d7f3cbdc72c7625881c8dc2660fc2c979fdf2 upstream. - -Because wireguard is calling icmp from network device context, it should -use the ndo helper so that the rate limiting applies correctly. This -commit adds a small test to the wireguard test suite to ensure that the -new functions continue doing the right thing in the context of -wireguard. It does this by setting up a condition that will definately -evoke an icmp error message from the driver, but along a nat'd path. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 4 ++-- - tools/testing/selftests/wireguard/netns.sh | 11 +++++++++++ - 2 files changed, 13 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -203,9 +203,9 @@ err_peer: - err: - ++dev->stats.tx_errors; - if (skb->protocol == htons(ETH_P_IP)) -- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); -+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); - else if (skb->protocol == htons(ETH_P_IPV6)) -- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); -+ icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); - kfree_skb(skb); - return ret; - } ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -24,6 +24,7 @@ - set -e - - exec 3>&1 -+export LANG=C - export WG_HIDE_KEYS=never - netns0="wg-test-$$-0" - netns1="wg-test-$$-1" -@@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_pref - n1 ping -W 1 -c 100 -f 192.168.99.7 - n1 ping -W 1 -c 100 -f abab::1111 - -+# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. -+n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 -+n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. -+n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' -+ip0 -4 route add 192.168.241.1 via 10.0.0.100 -+n2 wg set wg0 peer "$pub1" remove -+[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] -+ - n0 iptables -t nat -F -+n0 iptables -t filter -F -+n2 iptables -t nat -F - ip0 link del vethrc - ip0 link del vethrs - ip1 link del wg0 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0086-wireguard-selftests-reduce-complexity-and-fix-make-r.patch b/target/linux/generic/backport-5.4/080-wireguard-0086-wireguard-selftests-reduce-complexity-and-fix-make-r.patch deleted file mode 100644 index ac292a8682..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0086-wireguard-selftests-reduce-complexity-and-fix-make-r.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 14 Feb 2020 23:57:20 +0100 -Subject: [PATCH] wireguard: selftests: reduce complexity and fix make races - -commit 04ddf1208f03e1dbc39a4619c40eba640051b950 upstream. - -This gives us fewer dependencies and shortens build time, fixes up some -hash checking race conditions, and also fixes missing directory creation -that caused issues on massively parallel builds. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - .../testing/selftests/wireguard/qemu/Makefile | 38 +++++++------------ - 1 file changed, 14 insertions(+), 24 deletions(-) - ---- a/tools/testing/selftests/wireguard/qemu/Makefile -+++ b/tools/testing/selftests/wireguard/qemu/Makefile -@@ -38,19 +38,17 @@ endef - define file_download = - $(DISTFILES_PATH)/$(1): - mkdir -p $(DISTFILES_PATH) -- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' -- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi -+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' - endef - - $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) --$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) - $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) - $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) - $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) - $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) - $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) - $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) --$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) -+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) - - KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) - rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -@@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH - $(MAKE) -C $(IPERF_PATH) - $(STRIP) -s $@ - --$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) -- flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -- touch $@ -- --$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) -- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared -- $(MAKE) -C $(LIBMNL_PATH) -- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc -- - $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) -+ mkdir -p $(BUILD_PATH) - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - touch $@ - --$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg -+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) -+ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg - $(STRIP) -s $@ - - $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) -@@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.insta - $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) - mkdir -p $(BUILD_PATH) - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< -- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk -+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk - printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile - touch $@ - --$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip -- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip -- --$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss -- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss -+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) -+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip -+ $(STRIP) -s $@ -+ -+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) -+ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss -+ $(STRIP) -s $@ - - $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) - mkdir -p $(BUILD_PATH) -@@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_ - sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure - touch $@ - --$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) -- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include -+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) -+ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include - $(MAKE) -C $(IPTABLES_PATH) - $(STRIP) -s $@ - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0087-wireguard-receive-reset-last_under_load-to-zero.patch b/target/linux/generic/backport-5.4/080-wireguard-0087-wireguard-receive-reset-last_under_load-to-zero.patch deleted file mode 100644 index 193d28a83f..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0087-wireguard-receive-reset-last_under_load-to-zero.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 14 Feb 2020 23:57:21 +0100 -Subject: [PATCH] wireguard: receive: reset last_under_load to zero - -commit 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 upstream. - -This is a small optimization that prevents more expensive comparisons -from happening when they are no longer necessary, by clearing the -last_under_load variable whenever we wind up in a state where we were -under load but we no longer are. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Suggested-by: Matt Dunwoodie <ncon@noconroy.net> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet( - - under_load = skb_queue_len(&wg->incoming_handshakes) >= - MAX_QUEUED_INCOMING_HANDSHAKES / 8; -- if (under_load) -+ if (under_load) { - last_under_load = ktime_get_coarse_boottime_ns(); -- else if (last_under_load) -+ } else if (last_under_load) { - under_load = !wg_birthdate_has_expired(last_under_load, 1); -+ if (!under_load) -+ last_under_load = 0; -+ } - mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, - under_load); - if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || diff --git a/target/linux/generic/backport-5.4/080-wireguard-0088-wireguard-send-account-for-mtu-0-devices.patch b/target/linux/generic/backport-5.4/080-wireguard-0088-wireguard-send-account-for-mtu-0-devices.patch deleted file mode 100644 index d84efe20f0..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0088-wireguard-send-account-for-mtu-0-devices.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 14 Feb 2020 23:57:22 +0100 -Subject: [PATCH] wireguard: send: account for mtu=0 devices - -commit 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e upstream. - -It turns out there's an easy way to get packets queued up while still -having an MTU of zero, and that's via persistent keep alive. This commit -makes sure that in whatever condition, we don't wind up dividing by -zero. Note that an MTU of zero for a wireguard interface is something -quasi-valid, so I don't think the correct fix is to limit it via -min_mtu. This can be reproduced easily with: - -ip link add wg0 type wireguard -ip link add wg1 type wireguard -ip link set wg0 up mtu 0 -ip link set wg1 up -wg set wg0 private-key <(wg genkey) -wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key) -wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1 - -However, while min_mtu=0 seems fine, it makes sense to restrict the -max_mtu. This commit also restricts the maximum MTU to the greatest -number for which rounding up to the padding multiple won't overflow a -signed integer. Packets this large were always rejected anyway -eventually, due to checks deeper in, but it seems more sound not to even -let the administrator configure something that won't work anyway. - -We use this opportunity to clean up this function a bit so that it's -clear which paths we're expecting. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Cc: Eric Dumazet <eric.dumazet@gmail.com> -Reviewed-by: Eric Dumazet <edumazet@google.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 7 ++++--- - drivers/net/wireguard/send.c | 16 +++++++++++----- - 2 files changed, 15 insertions(+), 8 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -258,6 +258,8 @@ static void wg_setup(struct net_device * - enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | - NETIF_F_SG | NETIF_F_GSO | - NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; -+ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + -+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); - - dev->netdev_ops = &netdev_ops; - dev->hard_header_len = 0; -@@ -271,9 +273,8 @@ static void wg_setup(struct net_device * - dev->features |= WG_NETDEV_FEATURES; - dev->hw_features |= WG_NETDEV_FEATURES; - dev->hw_enc_features |= WG_NETDEV_FEATURES; -- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - -- sizeof(struct udphdr) - -- max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); -+ dev->mtu = ETH_DATA_LEN - overhead; -+ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; - - SET_NETDEV_DEVTYPE(dev, &device_type); - ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_pee - - static unsigned int calculate_skb_padding(struct sk_buff *skb) - { -+ unsigned int padded_size, last_unit = skb->len; -+ -+ if (unlikely(!PACKET_CB(skb)->mtu)) -+ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; -+ - /* We do this modulo business with the MTU, just in case the networking - * layer gives us a packet that's bigger than the MTU. In that case, we - * wouldn't want the final subtraction to overflow in the case of the -- * padded_size being clamped. -+ * padded_size being clamped. Fortunately, that's very rarely the case, -+ * so we optimize for that not happening. - */ -- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; -- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); -+ if (unlikely(last_unit > PACKET_CB(skb)->mtu)) -+ last_unit %= PACKET_CB(skb)->mtu; - -- if (padded_size > PACKET_CB(skb)->mtu) -- padded_size = PACKET_CB(skb)->mtu; -+ padded_size = min(PACKET_CB(skb)->mtu, -+ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); - return padded_size - last_unit; - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0089-wireguard-socket-remove-extra-call-to-synchronize_ne.patch b/target/linux/generic/backport-5.4/080-wireguard-0089-wireguard-socket-remove-extra-call-to-synchronize_ne.patch deleted file mode 100644 index 458e9d51e5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0089-wireguard-socket-remove-extra-call-to-synchronize_ne.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 14 Feb 2020 23:57:23 +0100 -Subject: [PATCH] wireguard: socket: remove extra call to synchronize_net - -commit 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e upstream. - -synchronize_net() is a wrapper around synchronize_rcu(), so there's no -point in having synchronize_net and synchronize_rcu back to back, -despite the documentation comment suggesting maybe it's somewhat useful, -"Wait for packets currently being received to be done." This commit -removes the extra call. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> -Reviewed-by: Eric Dumazet <edumazet@google.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/socket.c | 1 - - 1 file changed, 1 deletion(-) - ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device * - wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); - mutex_unlock(&wg->socket_update_lock); - synchronize_rcu(); -- synchronize_net(); - sock_free(old4); - sock_free(old6); - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0090-wireguard-selftests-remove-duplicated-include-sys-ty.patch b/target/linux/generic/backport-5.4/080-wireguard-0090-wireguard-selftests-remove-duplicated-include-sys-ty.patch deleted file mode 100644 index 93545e6760..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0090-wireguard-selftests-remove-duplicated-include-sys-ty.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: YueHaibing <yuehaibing@huawei.com> -Date: Wed, 18 Mar 2020 18:30:43 -0600 -Subject: [PATCH] wireguard: selftests: remove duplicated include <sys/types.h> - -commit 166391159c5deb84795d2ff46e95f276177fa5fb upstream. - -This commit removes a duplicated include. - -Signed-off-by: YueHaibing <yuehaibing@huawei.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/qemu/init.c | 1 - - 1 file changed, 1 deletion(-) - ---- a/tools/testing/selftests/wireguard/qemu/init.c -+++ b/tools/testing/selftests/wireguard/qemu/init.c -@@ -13,7 +13,6 @@ - #include <fcntl.h> - #include <sys/wait.h> - #include <sys/mount.h> --#include <sys/types.h> - #include <sys/stat.h> - #include <sys/types.h> - #include <sys/io.h> diff --git a/target/linux/generic/backport-5.4/080-wireguard-0091-wireguard-queueing-account-for-skb-protocol-0.patch b/target/linux/generic/backport-5.4/080-wireguard-0091-wireguard-queueing-account-for-skb-protocol-0.patch deleted file mode 100644 index a9ca655e74..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0091-wireguard-queueing-account-for-skb-protocol-0.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 18 Mar 2020 18:30:45 -0600 -Subject: [PATCH] wireguard: queueing: account for skb->protocol==0 - -commit a5588604af448664e796daf3c1d5a4523c60667b upstream. - -We carry out checks to the effect of: - - if (skb->protocol != wg_examine_packet_protocol(skb)) - goto err; - -By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this -means that the check above still passes in the case where skb->protocol -is zero, which is possible to hit with AF_PACKET: - - struct sockaddr_pkt saddr = { .spkt_device = "wg0" }; - unsigned char buffer[5] = { 0 }; - sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0), - buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); - -Additional checks mean that this isn't actually a problem in the code -base, but I could imagine it becoming a problem later if the function is -used more liberally. - -I would prefer to fix this by having wg_examine_packet_protocol return a -32-bit ~0 value on failure, which will never match any value of -skb->protocol, which would simply change the generated code from a mov -to a movzx. However, sparse complains, and adding __force casts doesn't -seem like a good idea, so instead we just add a simple helper function -to check for the zero return value. Since wg_examine_packet_protocol -itself gets inlined, this winds up not adding an additional branch to -the generated code, since the 0 return value already happens in a -mergable branch. - -Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 2 +- - drivers/net/wireguard/queueing.h | 8 +++++++- - drivers/net/wireguard/receive.c | 4 ++-- - 3 files changed, 10 insertions(+), 4 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buf - u32 mtu; - int ret; - -- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { -+ if (unlikely(!wg_check_packet_protocol(skb))) { - ret = -EPROTONOSUPPORT; - net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); - goto err; ---- a/drivers/net/wireguard/queueing.h -+++ b/drivers/net/wireguard/queueing.h -@@ -66,7 +66,7 @@ struct packet_cb { - #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) - - /* Returns either the correct skb->protocol value, or 0 if invalid. */ --static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) -+static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) - { - if (skb_network_header(skb) >= skb->head && - (skb_network_header(skb) + sizeof(struct iphdr)) <= -@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untr - return 0; - } - -+static inline bool wg_check_packet_protocol(struct sk_buff *skb) -+{ -+ __be16 real_protocol = wg_examine_packet_protocol(skb); -+ return real_protocol && skb->protocol == real_protocol; -+} -+ - static inline void wg_reset_packet(struct sk_buff *skb) - { - skb_scrub_packet(skb, true); ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_ - size_t data_offset, data_len, header_len; - struct udphdr *udp; - -- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || -+ if (unlikely(!wg_check_packet_protocol(skb) || - skb_transport_header(skb) < skb->head || - (skb_transport_header(skb) + sizeof(struct udphdr)) > - skb_tail_pointer(skb))) -@@ -388,7 +388,7 @@ static void wg_packet_consume_data_done( - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = ~0; /* All levels */ -- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); -+ skb->protocol = wg_examine_packet_protocol(skb); - if (skb->protocol == htons(ETH_P_IP)) { - len = ntohs(ip_hdr(skb)->tot_len); - if (unlikely(len < sizeof(struct iphdr))) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0092-wireguard-receive-remove-dead-code-from-default-pack.patch b/target/linux/generic/backport-5.4/080-wireguard-0092-wireguard-receive-remove-dead-code-from-default-pack.patch deleted file mode 100644 index bcd4fbfbc1..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0092-wireguard-receive-remove-dead-code-from-default-pack.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 18 Mar 2020 18:30:46 -0600 -Subject: [PATCH] wireguard: receive: remove dead code from default packet type - case - -commit 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 upstream. - -The situation in which we wind up hitting the default case here -indicates a major bug in earlier parsing code. It is not a usual thing -that should ever happen, which means a "friendly" message for it doesn't -make sense. Rather, replace this with a WARN_ON, just like we do earlier -in the file for a similar situation, so that somebody sends us a bug -report and we can fix it. - -Reported-by: Fabian Freyer <fabianfreyer@radicallyopensecurity.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device - wg_packet_consume_data(wg, skb); - break; - default: -- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", -- wg->dev->name, skb); -+ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n"); - goto err; - } - return; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0093-wireguard-noise-error-out-precomputed-DH-during-hand.patch b/target/linux/generic/backport-5.4/080-wireguard-0093-wireguard-noise-error-out-precomputed-DH-during-hand.patch deleted file mode 100644 index dac3046e47..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0093-wireguard-noise-error-out-precomputed-DH-during-hand.patch +++ /dev/null @@ -1,224 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 18 Mar 2020 18:30:47 -0600 -Subject: [PATCH] wireguard: noise: error out precomputed DH during handshake - rather than config - -commit 11a7686aa99c7fe4b3f80f6dcccd54129817984d upstream. - -We precompute the static-static ECDH during configuration time, in order -to save an expensive computation later when receiving network packets. -However, not all ECDH computations yield a contributory result. Prior, -we were just not letting those peers be added to the interface. However, -this creates a strange inconsistency, since it was still possible to add -other weird points, like a valid public key plus a low-order point, and, -like points that result in zeros, a handshake would not complete. In -order to make the behavior more uniform and less surprising, simply -allow all peers to be added. Then, we'll error out later when doing the -crypto if there's an issue. This also adds more separation between the -crypto layer and the configuration layer. - -Discussed-with: Mathias Hall-Andersen <mathias@hall-andersen.dk> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/netlink.c | 8 +--- - drivers/net/wireguard/noise.c | 55 ++++++++++++---------- - drivers/net/wireguard/noise.h | 12 ++--- - drivers/net/wireguard/peer.c | 7 +-- - tools/testing/selftests/wireguard/netns.sh | 15 ++++-- - 5 files changed, 49 insertions(+), 48 deletions(-) - ---- a/drivers/net/wireguard/netlink.c -+++ b/drivers/net/wireguard/netlink.c -@@ -417,11 +417,7 @@ static int set_peer(struct wg_device *wg - - peer = wg_peer_create(wg, public_key, preshared_key); - if (IS_ERR(peer)) { -- /* Similar to the above, if the key is invalid, we skip -- * it without fanfare, so that services don't need to -- * worry about doing key validation themselves. -- */ -- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); -+ ret = PTR_ERR(peer); - peer = NULL; - goto out; - } -@@ -575,7 +571,7 @@ static int wg_set_device(struct sk_buff - private_key); - list_for_each_entry_safe(peer, temp, &wg->peer_list, - peer_list) { -- BUG_ON(!wg_noise_precompute_static_static(peer)); -+ wg_noise_precompute_static_static(peer); - wg_noise_expire_current_peer_keypairs(peer); - } - wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -44,32 +44,23 @@ void __init wg_noise_init(void) - } - - /* Must hold peer->handshake.static_identity->lock */ --bool wg_noise_precompute_static_static(struct wg_peer *peer) -+void wg_noise_precompute_static_static(struct wg_peer *peer) - { -- bool ret; -- - down_write(&peer->handshake.lock); -- if (peer->handshake.static_identity->has_identity) { -- ret = curve25519( -- peer->handshake.precomputed_static_static, -+ if (!peer->handshake.static_identity->has_identity || -+ !curve25519(peer->handshake.precomputed_static_static, - peer->handshake.static_identity->static_private, -- peer->handshake.remote_static); -- } else { -- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; -- -- ret = curve25519(empty, empty, peer->handshake.remote_static); -+ peer->handshake.remote_static)) - memset(peer->handshake.precomputed_static_static, 0, - NOISE_PUBLIC_KEY_LEN); -- } - up_write(&peer->handshake.lock); -- return ret; - } - --bool wg_noise_handshake_init(struct noise_handshake *handshake, -- struct noise_static_identity *static_identity, -- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -- struct wg_peer *peer) -+void wg_noise_handshake_init(struct noise_handshake *handshake, -+ struct noise_static_identity *static_identity, -+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -+ struct wg_peer *peer) - { - memset(handshake, 0, sizeof(*handshake)); - init_rwsem(&handshake->lock); -@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct nois - NOISE_SYMMETRIC_KEY_LEN); - handshake->static_identity = static_identity; - handshake->state = HANDSHAKE_ZEROED; -- return wg_noise_precompute_static_static(peer); -+ wg_noise_precompute_static_static(peer); - } - - static void handshake_zero(struct noise_handshake *handshake) -@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chain - return true; - } - -+static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], -+ u8 key[NOISE_SYMMETRIC_KEY_LEN], -+ const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) -+{ -+ static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; -+ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) -+ return false; -+ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, -+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, -+ chaining_key); -+ return true; -+} -+ - static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) - { - struct blake2s_state blake; -@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(str - NOISE_PUBLIC_KEY_LEN, key, handshake->hash); - - /* ss */ -- kdf(handshake->chaining_key, key, NULL, -- handshake->precomputed_static_static, NOISE_HASH_LEN, -- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, -- handshake->chaining_key); -+ if (!mix_precomputed_dh(handshake->chaining_key, key, -+ handshake->precomputed_static_static)) -+ goto out; - - /* {t} */ - tai64n_now(timestamp); -@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(st - handshake = &peer->handshake; - - /* ss */ -- kdf(chaining_key, key, NULL, handshake->precomputed_static_static, -- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, -- chaining_key); -+ if (!mix_precomputed_dh(chaining_key, key, -+ handshake->precomputed_static_static)) -+ goto out; - - /* {t} */ - if (!message_decrypt(t, src->encrypted_timestamp, ---- a/drivers/net/wireguard/noise.h -+++ b/drivers/net/wireguard/noise.h -@@ -94,11 +94,11 @@ struct noise_handshake { - struct wg_device; - - void wg_noise_init(void); --bool wg_noise_handshake_init(struct noise_handshake *handshake, -- struct noise_static_identity *static_identity, -- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -- struct wg_peer *peer); -+void wg_noise_handshake_init(struct noise_handshake *handshake, -+ struct noise_static_identity *static_identity, -+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], -+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], -+ struct wg_peer *peer); - void wg_noise_handshake_clear(struct noise_handshake *handshake); - static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) - { -@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypai - void wg_noise_set_static_identity_private_key( - struct noise_static_identity *static_identity, - const u8 private_key[NOISE_PUBLIC_KEY_LEN]); --bool wg_noise_precompute_static_static(struct wg_peer *peer); -+void wg_noise_precompute_static_static(struct wg_peer *peer); - - bool - wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, ---- a/drivers/net/wireguard/peer.c -+++ b/drivers/net/wireguard/peer.c -@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg - return ERR_PTR(ret); - peer->device = wg; - -- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, -- public_key, preshared_key, peer)) { -- ret = -EKEYREJECTED; -- goto err_1; -- } -+ wg_noise_handshake_init(&peer->handshake, &wg->static_identity, -+ public_key, preshared_key, peer); - if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) - goto err_1; - if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0 - n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 - n0 wg set wg0 peer "$pub2" allowed-ips ::/0 - n0 wg set wg0 peer "$pub2" remove --low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) --n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } --[[ -z $(n0 wg show wg0 peers) ]] --n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } --[[ -z $(n0 wg show wg0 peers) ]] -+for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do -+ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111 -+done -+[[ -n $(n0 wg show wg0 peers) ]] -+exec 4< <(n0 ncat -l -u -p 1111) -+ncat_pid=$! -+waitncatudp $netns0 $ncat_pid -+ip0 link set wg0 up -+! read -r -n 1 -t 2 <&4 || false -+kill $ncat_pid - ip0 link del wg0 - - declare -A objects diff --git a/target/linux/generic/backport-5.4/080-wireguard-0094-wireguard-send-remove-errant-newline-from-packet_enc.patch b/target/linux/generic/backport-5.4/080-wireguard-0094-wireguard-send-remove-errant-newline-from-packet_enc.patch deleted file mode 100644 index c92b6a784a..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0094-wireguard-send-remove-errant-newline-from-packet_enc.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Sultan Alsawaf <sultan@kerneltoast.com> -Date: Wed, 29 Apr 2020 14:59:20 -0600 -Subject: [PATCH] wireguard: send: remove errant newline from - packet_encrypt_worker - -commit d6833e42786e050e7522d6a91a9361e54085897d upstream. - -This commit removes a useless newline at the end of a scope, which -doesn't add anything in the way of organization or readability. - -Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/send.c | 1 - - 1 file changed, 1 deletion(-) - ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -304,7 +304,6 @@ void wg_packet_encrypt_worker(struct wor - } - wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, - state); -- - } - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0095-wireguard-queueing-cleanup-ptr_ring-in-error-path-of.patch b/target/linux/generic/backport-5.4/080-wireguard-0095-wireguard-queueing-cleanup-ptr_ring-in-error-path-of.patch deleted file mode 100644 index a72c509894..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0095-wireguard-queueing-cleanup-ptr_ring-in-error-path-of.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 29 Apr 2020 14:59:21 -0600 -Subject: [PATCH] wireguard: queueing: cleanup ptr_ring in error path of - packet_queue_init - -commit 130c58606171326c81841a49cc913cd354113dd9 upstream. - -Prior, if the alloc_percpu of packet_percpu_multicore_worker_alloc -failed, the previously allocated ptr_ring wouldn't be freed. This commit -adds the missing call to ptr_ring_cleanup in the error case. - -Reported-by: Sultan Alsawaf <sultan@kerneltoast.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/queueing.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/drivers/net/wireguard/queueing.c -+++ b/drivers/net/wireguard/queueing.c -@@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_qu - if (multicore) { - queue->worker = wg_packet_percpu_multicore_worker_alloc( - function, queue); -- if (!queue->worker) -+ if (!queue->worker) { -+ ptr_ring_cleanup(&queue->ring, NULL); - return -ENOMEM; -+ } - } else { - INIT_WORK(&queue->work, function); - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0096-wireguard-receive-use-tunnel-helpers-for-decapsulati.patch b/target/linux/generic/backport-5.4/080-wireguard-0096-wireguard-receive-use-tunnel-helpers-for-decapsulati.patch deleted file mode 100644 index a72358c302..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0096-wireguard-receive-use-tunnel-helpers-for-decapsulati.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com> -Date: Wed, 29 Apr 2020 14:59:22 -0600 -Subject: [PATCH] wireguard: receive: use tunnel helpers for decapsulating ECN - markings -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit eebabcb26ea1e3295704477c6cd4e772c96a9559 upstream. - -WireGuard currently only propagates ECN markings on tunnel decap according -to the old RFC3168 specification. However, the spec has since been updated -in RFC6040 to recommend slightly different decapsulation semantics. This -was implemented in the kernel as a set of common helpers for ECN -decapsulation, so let's just switch over WireGuard to using those, so it -can benefit from this enhancement and any future tweaks. We do not drop -packets with invalid ECN marking combinations, because WireGuard is -frequently used to work around broken ISPs, which could be doing that. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Reported-by: Olivier Tilmans <olivier.tilmans@nokia-bell-labs.com> -Cc: Dave Taht <dave.taht@gmail.com> -Cc: Rodney W. Grimes <ietf@gndrsh.dnsmgr.net> -Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -393,13 +393,11 @@ static void wg_packet_consume_data_done( - len = ntohs(ip_hdr(skb)->tot_len); - if (unlikely(len < sizeof(struct iphdr))) - goto dishonest_packet_size; -- if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) -- IP_ECN_set_ce(ip_hdr(skb)); -+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos); - } else if (skb->protocol == htons(ETH_P_IPV6)) { - len = ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr); -- if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) -- IP6_ECN_set_ce(skb, ipv6_hdr(skb)); -+ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb))); - } else { - goto dishonest_packet_type; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0097-wireguard-selftests-use-normal-kernel-stack-size-on-.patch b/target/linux/generic/backport-5.4/080-wireguard-0097-wireguard-selftests-use-normal-kernel-stack-size-on-.patch deleted file mode 100644 index f4543d2568..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0097-wireguard-selftests-use-normal-kernel-stack-size-on-.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 6 May 2020 15:33:02 -0600 -Subject: [PATCH] wireguard: selftests: use normal kernel stack size on ppc64 - -commit a0fd7cc87a018df1a17f9d3f0bd994c1f22c6b34 upstream. - -While at some point it might have made sense to be running these tests -on ppc64 with 4k stacks, the kernel hasn't actually used 4k stacks on -64-bit powerpc in a long time, and more interesting things that we test -don't really work when we deviate from the default (16k). So, we stop -pushing our luck in this commit, and return to the default instead of -the minimum. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 1 + - 1 file changed, 1 insertion(+) - ---- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config -+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config -@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y - CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" - CONFIG_SECTION_MISMATCH_WARN_ONLY=y - CONFIG_FRAME_WARN=1280 -+CONFIG_THREAD_SHIFT=14 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0098-wireguard-socket-remove-errant-restriction-on-loopin.patch b/target/linux/generic/backport-5.4/080-wireguard-0098-wireguard-socket-remove-errant-restriction-on-loopin.patch deleted file mode 100644 index 6dafa4781b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0098-wireguard-socket-remove-errant-restriction-on-loopin.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 6 May 2020 15:33:03 -0600 -Subject: [PATCH] wireguard: socket: remove errant restriction on looping to - self - -commit b673e24aad36981f327a6570412ffa7754de8911 upstream. - -It's already possible to create two different interfaces and loop -packets between them. This has always been possible with tunnels in the -kernel, and isn't specific to wireguard. Therefore, the networking stack -already needs to deal with that. At the very least, the packet winds up -exceeding the MTU and is discarded at that point. So, since this is -already something that happens, there's no need to forbid the not very -exceptional case of routing a packet back to the same interface; this -loop is no different than others, and we shouldn't special case it, but -rather rely on generic handling of loops in general. This also makes it -easier to do interesting things with wireguard such as onion routing. - -At the same time, we add a selftest for this, ensuring that both onion -routing works and infinite routing loops do not crash the kernel. We -also add a test case for wireguard interfaces nesting packets and -sending traffic between each other, as well as the loop in this case -too. We make sure to send some throughput-heavy traffic for this use -case, to stress out any possible recursion issues with the locks around -workqueues. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/socket.c | 12 ----- - tools/testing/selftests/wireguard/netns.sh | 54 ++++++++++++++++++++-- - 2 files changed, 51 insertions(+), 15 deletions(-) - ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, s - net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", - wg->dev->name, &endpoint->addr, ret); - goto err; -- } else if (unlikely(rt->dst.dev == skb->dev)) { -- ip_rt_put(rt); -- ret = -ELOOP; -- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", -- wg->dev->name, &endpoint->addr); -- goto err; - } - if (cache) - dst_cache_set_ip4(cache, &rt->dst, fl.saddr); -@@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, s - net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", - wg->dev->name, &endpoint->addr, ret); - goto err; -- } else if (unlikely(dst->dev == skb->dev)) { -- dst_release(dst); -- ret = -ELOOP; -- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", -- wg->dev->name, &endpoint->addr); -- goto err; - } - if (cache) - dst_cache_set_ip6(cache, dst, &fl.saddr); ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -48,8 +48,11 @@ cleanup() { - exec 2>/dev/null - printf "$orig_message_cost" > /proc/sys/net/core/message_cost - ip0 link del dev wg0 -+ ip0 link del dev wg1 - ip1 link del dev wg0 -+ ip1 link del dev wg1 - ip2 link del dev wg0 -+ ip2 link del dev wg1 - local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" - [[ -n $to_kill ]] && kill $to_kill - pp ip netns del $netns1 -@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2 - key1="$(pp wg genkey)" - key2="$(pp wg genkey)" - key3="$(pp wg genkey)" -+key4="$(pp wg genkey)" - pub1="$(pp wg pubkey <<<"$key1")" - pub2="$(pp wg pubkey <<<"$key2")" - pub3="$(pp wg pubkey <<<"$key3")" -+pub4="$(pp wg pubkey <<<"$key4")" - psk="$(pp wg genpsk)" - [[ -n $key1 && -n $key2 && -n $psk ]] - - configure_peers() { - ip1 addr add 192.168.241.1/24 dev wg0 -- ip1 addr add fd00::1/24 dev wg0 -+ ip1 addr add fd00::1/112 dev wg0 - - ip2 addr add 192.168.241.2/24 dev wg0 -- ip2 addr add fd00::2/24 dev wg0 -+ ip2 addr add fd00::2/112 dev wg0 - - n1 wg set wg0 \ - private-key <(echo "$key1") \ -@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2 - n1 wg set wg0 private-key <(echo "$key3") - n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove - n1 ping -W 1 -c 1 192.168.241.2 -+n2 wg set wg0 peer "$pub3" remove - --ip1 link del wg0 -+# Test that we can route wg through wg -+ip1 addr flush dev wg0 -+ip2 addr flush dev wg0 -+ip1 addr add fd00::5:1/112 dev wg0 -+ip2 addr add fd00::5:2/112 dev wg0 -+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2 -+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998 -+ip1 link add wg1 type wireguard -+ip2 link add wg1 type wireguard -+ip1 addr add 192.168.241.1/24 dev wg1 -+ip1 addr add fd00::1/112 dev wg1 -+ip2 addr add 192.168.241.2/24 dev wg1 -+ip2 addr add fd00::2/112 dev wg1 -+ip1 link set mtu 1340 up dev wg1 -+ip2 link set mtu 1340 up dev wg1 -+n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5 -+n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5 -+tests -+# Try to set up a routing loop between the two namespaces -+ip1 link set netns $netns0 dev wg1 -+ip0 addr add 192.168.241.1/24 dev wg1 -+ip0 link set up dev wg1 -+n0 ping -W 1 -c 1 192.168.241.2 -+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 - ip2 link del wg0 -+ip2 link del wg1 -+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel -+ -+ip0 link del wg1 -+ip1 link del wg0 - - # Test using NAT. We now change the topology to this: - # ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ -@@ -282,6 +316,20 @@ pp sleep 3 - n2 ping -W 1 -c 1 192.168.241.1 - n1 wg set wg0 peer "$pub2" persistent-keepalive 0 - -+# Test that onion routing works, even when it loops -+n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 -+ip1 addr add 192.168.242.1/24 dev wg0 -+ip2 link add wg1 type wireguard -+ip2 addr add 192.168.242.2/24 dev wg1 -+n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32 -+ip2 link set wg1 up -+n1 ping -W 1 -c 1 192.168.242.2 -+ip2 link del wg1 -+n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5 -+! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel -+n1 wg set wg0 peer "$pub3" remove -+ip1 addr del 192.168.242.1/24 dev wg0 -+ - # Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. - ip1 -6 addr add fc00::9/96 dev vethc - ip1 -6 route add default via fc00::1 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0099-wireguard-send-receive-cond_resched-when-processing-.patch b/target/linux/generic/backport-5.4/080-wireguard-0099-wireguard-send-receive-cond_resched-when-processing-.patch deleted file mode 100644 index 499b36bc5f..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0099-wireguard-send-receive-cond_resched-when-processing-.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 6 May 2020 15:33:04 -0600 -Subject: [PATCH] wireguard: send/receive: cond_resched() when processing - worker ringbuffers - -commit 4005f5c3c9d006157ba716594e0d70c88a235c5e upstream. - -Users with pathological hardware reported CPU stalls on CONFIG_ -PREEMPT_VOLUNTARY=y, because the ringbuffers would stay full, meaning -these workers would never terminate. That turned out not to be okay on -systems without forced preemption, which Sultan observed. This commit -adds a cond_resched() to the bottom of each loop iteration, so that -these workers don't hog the core. Note that we don't need this on the -napi poll worker, since that terminates after its budget is expended. - -Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com> -Reported-by: Wang Jian <larkwang@gmail.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 2 ++ - drivers/net/wireguard/send.c | 4 ++++ - 2 files changed, 6 insertions(+) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -516,6 +516,8 @@ void wg_packet_decrypt_worker(struct wor - &PACKET_CB(skb)->keypair->receiving)) ? - PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - wg_queue_enqueue_per_peer_napi(skb, state); -+ if (need_resched()) -+ cond_resched(); - } - } - ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -281,6 +281,8 @@ void wg_packet_tx_worker(struct work_str - - wg_noise_keypair_put(keypair, false); - wg_peer_put(peer); -+ if (need_resched()) -+ cond_resched(); - } - } - -@@ -304,6 +306,8 @@ void wg_packet_encrypt_worker(struct wor - } - wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, - state); -+ if (need_resched()) -+ cond_resched(); - } - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0100-wireguard-selftests-initalize-ipv6-members-to-NULL-t.patch b/target/linux/generic/backport-5.4/080-wireguard-0100-wireguard-selftests-initalize-ipv6-members-to-NULL-t.patch deleted file mode 100644 index c1124be5ca..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0100-wireguard-selftests-initalize-ipv6-members-to-NULL-t.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 6 May 2020 15:33:05 -0600 -Subject: [PATCH] wireguard: selftests: initalize ipv6 members to NULL to - squelch clang warning - -commit 4fed818ef54b08d4b29200e416cce65546ad5312 upstream. - -Without setting these to NULL, clang complains in certain -configurations that have CONFIG_IPV6=n: - -In file included from drivers/net/wireguard/ratelimiter.c:223: -drivers/net/wireguard/selftest/ratelimiter.c:173:34: error: variable 'skb6' is uninitialized when used here [-Werror,-Wuninitialized] - ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); - ^~~~ -drivers/net/wireguard/selftest/ratelimiter.c:123:29: note: initialize the variable 'skb6' to silence this warning - struct sk_buff *skb4, *skb6; - ^ - = NULL -drivers/net/wireguard/selftest/ratelimiter.c:173:40: error: variable 'hdr6' is uninitialized when used here [-Werror,-Wuninitialized] - ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); - ^~~~ -drivers/net/wireguard/selftest/ratelimiter.c:125:22: note: initialize the variable 'hdr6' to silence this warning - struct ipv6hdr *hdr6; - ^ - -We silence this warning by setting the variables to NULL as the warning -suggests. - -Reported-by: Arnd Bergmann <arnd@arndb.de> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/selftest/ratelimiter.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/selftest/ratelimiter.c -+++ b/drivers/net/wireguard/selftest/ratelimiter.c -@@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void - enum { TRIALS_BEFORE_GIVING_UP = 5000 }; - bool success = false; - int test = 0, trials; -- struct sk_buff *skb4, *skb6; -+ struct sk_buff *skb4, *skb6 = NULL; - struct iphdr *hdr4; -- struct ipv6hdr *hdr6; -+ struct ipv6hdr *hdr6 = NULL; - - if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) - return true; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0101-wireguard-send-receive-use-explicit-unlikely-branch-.patch b/target/linux/generic/backport-5.4/080-wireguard-0101-wireguard-send-receive-use-explicit-unlikely-branch-.patch deleted file mode 100644 index 900e2f2350..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0101-wireguard-send-receive-use-explicit-unlikely-branch-.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 6 May 2020 15:33:06 -0600 -Subject: [PATCH] wireguard: send/receive: use explicit unlikely branch instead - of implicit coalescing - -commit 243f2148937adc72bcaaa590d482d599c936efde upstream. - -It's very unlikely that send will become true. It's nearly always false -between 0 and 120 seconds of a session, and in most cases becomes true -only between 120 and 121 seconds before becoming false again. So, -unlikely(send) is clearly the right option here. - -What happened before was that we had this complex boolean expression -with multiple likely and unlikely clauses nested. Since this is -evaluated left-to-right anyway, the whole thing got converted to -unlikely. So, we can clean this up to better represent what's going on. - -The generated code is the same. - -Suggested-by: Sultan Alsawaf <sultan@kerneltoast.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 13 ++++++------- - drivers/net/wireguard/send.c | 15 ++++++--------- - 2 files changed, 12 insertions(+), 16 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -226,21 +226,20 @@ void wg_packet_handshake_receive_worker( - static void keep_key_fresh(struct wg_peer *peer) - { - struct noise_keypair *keypair; -- bool send = false; -+ bool send; - - if (peer->sent_lastminute_handshake) - return; - - rcu_read_lock_bh(); - keypair = rcu_dereference_bh(peer->keypairs.current_keypair); -- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && -- keypair->i_am_the_initiator && -- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, -- REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) -- send = true; -+ send = keypair && READ_ONCE(keypair->sending.is_valid) && -+ keypair->i_am_the_initiator && -+ wg_birthdate_has_expired(keypair->sending.birthdate, -+ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT); - rcu_read_unlock_bh(); - -- if (send) { -+ if (unlikely(send)) { - peer->sent_lastminute_handshake = true; - wg_packet_send_queued_handshake_initiation(peer, false); - } ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -124,20 +124,17 @@ void wg_packet_send_handshake_cookie(str - static void keep_key_fresh(struct wg_peer *peer) - { - struct noise_keypair *keypair; -- bool send = false; -+ bool send; - - rcu_read_lock_bh(); - keypair = rcu_dereference_bh(peer->keypairs.current_keypair); -- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && -- (unlikely(atomic64_read(&keypair->sending.counter.counter) > -- REKEY_AFTER_MESSAGES) || -- (keypair->i_am_the_initiator && -- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, -- REKEY_AFTER_TIME))))) -- send = true; -+ send = keypair && READ_ONCE(keypair->sending.is_valid) && -+ (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || -+ (keypair->i_am_the_initiator && -+ wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); - rcu_read_unlock_bh(); - -- if (send) -+ if (unlikely(send)) - wg_packet_send_queued_handshake_initiation(peer, false); - } - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0102-wireguard-selftests-use-newer-iproute2-for-gcc-10.patch b/target/linux/generic/backport-5.4/080-wireguard-0102-wireguard-selftests-use-newer-iproute2-for-gcc-10.patch deleted file mode 100644 index d4efe37a49..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0102-wireguard-selftests-use-newer-iproute2-for-gcc-10.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 19 May 2020 22:49:27 -0600 -Subject: [PATCH] wireguard: selftests: use newer iproute2 for gcc-10 - -commit ee3c1aa3f34b7842c1557cfe5d8c3f7b8c692de8 upstream. - -gcc-10 switched to defaulting to -fno-common, which broke iproute2-5.4. -This was fixed in iproute-5.6, so switch to that. Because we're after a -stable testing surface, we generally don't like to bump these -unnecessarily, but in this case, being able to actually build is a basic -necessity. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/qemu/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/tools/testing/selftests/wireguard/qemu/Makefile -+++ b/tools/testing/selftests/wireguard/qemu/Makefile -@@ -44,7 +44,7 @@ endef - $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) - $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) - $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) --$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) -+$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) - $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) - $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) - $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0103-wireguard-noise-read-preshared-key-while-taking-lock.patch b/target/linux/generic/backport-5.4/080-wireguard-0103-wireguard-noise-read-preshared-key-while-taking-lock.patch deleted file mode 100644 index 2dac4b7064..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0103-wireguard-noise-read-preshared-key-while-taking-lock.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 19 May 2020 22:49:28 -0600 -Subject: [PATCH] wireguard: noise: read preshared key while taking lock - -commit bc67d371256f5c47d824e2eec51e46c8d62d022e upstream. - -Prior we read the preshared key after dropping the handshake lock, which -isn't an actual crypto issue if it races, but it's still not quite -correct. So copy that part of the state into a temporary like we do with -the rest of the handshake state variables. Then we can release the lock, -operate on the temporary, and zero it out at the end of the function. In -performance tests, the impact of this was entirely unnoticable, probably -because those bytes are coming from the same cacheline as other things -that are being copied out in the same manner. - -Reported-by: Matt Dunwoodie <ncon@noconroy.net> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/noise.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -715,6 +715,7 @@ wg_noise_handshake_consume_response(stru - u8 e[NOISE_PUBLIC_KEY_LEN]; - u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; - u8 static_private[NOISE_PUBLIC_KEY_LEN]; -+ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; - - down_read(&wg->static_identity.lock); - -@@ -733,6 +734,8 @@ wg_noise_handshake_consume_response(stru - memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); - memcpy(ephemeral_private, handshake->ephemeral_private, - NOISE_PUBLIC_KEY_LEN); -+ memcpy(preshared_key, handshake->preshared_key, -+ NOISE_SYMMETRIC_KEY_LEN); - up_read(&handshake->lock); - - if (state != HANDSHAKE_CREATED_INITIATION) -@@ -750,7 +753,7 @@ wg_noise_handshake_consume_response(stru - goto fail; - - /* psk */ -- mix_psk(chaining_key, hash, key, handshake->preshared_key); -+ mix_psk(chaining_key, hash, key, preshared_key); - - /* {} */ - if (!message_decrypt(NULL, src->encrypted_nothing, -@@ -783,6 +786,7 @@ out: - memzero_explicit(chaining_key, NOISE_HASH_LEN); - memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); - memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); -+ memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN); - up_read(&wg->static_identity.lock); - return ret_peer; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0104-wireguard-queueing-preserve-flow-hash-across-packet-.patch b/target/linux/generic/backport-5.4/080-wireguard-0104-wireguard-queueing-preserve-flow-hash-across-packet-.patch deleted file mode 100644 index 31deadbfc1..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0104-wireguard-queueing-preserve-flow-hash-across-packet-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 19 May 2020 22:49:29 -0600 -Subject: [PATCH] wireguard: queueing: preserve flow hash across packet - scrubbing -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit c78a0b4a78839d572d8a80f6a62221c0d7843135 upstream. - -It's important that we clear most header fields during encapsulation and -decapsulation, because the packet is substantially changed, and we don't -want any info leak or logic bug due to an accidental correlation. But, -for encapsulation, it's wrong to clear skb->hash, since it's used by -fq_codel and flow dissection in general. Without it, classification does -not proceed as usual. This change might make it easier to estimate the -number of innerflows by examining clustering of out of order packets, -but this shouldn't open up anything that can't already be inferred -otherwise (e.g. syn packet size inference), and fq_codel can be disabled -anyway. - -Furthermore, it might be the case that the hash isn't used or queried at -all until after wireguard transmits the encrypted UDP packet, which -means skb->hash might still be zero at this point, and thus no hash -taken over the inner packet data. In order to address this situation, we -force a calculation of skb->hash before encrypting packet data. - -Of course this means that fq_codel might transmit packets slightly more -out of order than usual. Toke did some testing on beefy machines with -high quantities of parallel flows and found that increasing the -reply-attack counter to 8192 takes care of the most pathological cases -pretty well. - -Reported-by: Dave Taht <dave.taht@gmail.com> -Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@toke.dk> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/messages.h | 2 +- - drivers/net/wireguard/queueing.h | 10 +++++++++- - drivers/net/wireguard/receive.c | 2 +- - drivers/net/wireguard/send.c | 7 ++++++- - 4 files changed, 17 insertions(+), 4 deletions(-) - ---- a/drivers/net/wireguard/messages.h -+++ b/drivers/net/wireguard/messages.h -@@ -32,7 +32,7 @@ enum cookie_values { - }; - - enum counter_values { -- COUNTER_BITS_TOTAL = 2048, -+ COUNTER_BITS_TOTAL = 8192, - COUNTER_REDUNDANT_BITS = BITS_PER_LONG, - COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS - }; ---- a/drivers/net/wireguard/queueing.h -+++ b/drivers/net/wireguard/queueing.h -@@ -87,12 +87,20 @@ static inline bool wg_check_packet_proto - return real_protocol && skb->protocol == real_protocol; - } - --static inline void wg_reset_packet(struct sk_buff *skb) -+static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating) - { -+ u8 l4_hash = skb->l4_hash; -+ u8 sw_hash = skb->sw_hash; -+ u32 hash = skb->hash; - skb_scrub_packet(skb, true); - memset(&skb->headers_start, 0, - offsetof(struct sk_buff, headers_end) - - offsetof(struct sk_buff, headers_start)); -+ if (encapsulating) { -+ skb->l4_hash = l4_hash; -+ skb->sw_hash = sw_hash; -+ skb->hash = hash; -+ } - skb->queue_mapping = 0; - skb->nohdr = 0; - skb->peeked = 0; ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -484,7 +484,7 @@ int wg_packet_rx_poll(struct napi_struct - if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) - goto next; - -- wg_reset_packet(skb); -+ wg_reset_packet(skb, false); - wg_packet_consume_data_done(peer, skb, &endpoint); - free = false; - ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -167,6 +167,11 @@ static bool encrypt_packet(struct sk_buf - struct sk_buff *trailer; - int num_frags; - -+ /* Force hash calculation before encryption so that flow analysis is -+ * consistent over the inner packet. -+ */ -+ skb_get_hash(skb); -+ - /* Calculate lengths. */ - padding_len = calculate_skb_padding(skb); - trailer_len = padding_len + noise_encrypted_len(0); -@@ -295,7 +300,7 @@ void wg_packet_encrypt_worker(struct wor - skb_list_walk_safe(first, skb, next) { - if (likely(encrypt_packet(skb, - PACKET_CB(first)->keypair))) { -- wg_reset_packet(skb); -+ wg_reset_packet(skb, true); - } else { - state = PACKET_STATE_DEAD; - break; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0105-wireguard-noise-separate-receive-counter-from-send-c.patch b/target/linux/generic/backport-5.4/080-wireguard-0105-wireguard-noise-separate-receive-counter-from-send-c.patch deleted file mode 100644 index 87d38d36fe..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0105-wireguard-noise-separate-receive-counter-from-send-c.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 19 May 2020 22:49:30 -0600 -Subject: [PATCH] wireguard: noise: separate receive counter from send counter - -commit a9e90d9931f3a474f04bab782ccd9d77904941e9 upstream. - -In "wireguard: queueing: preserve flow hash across packet scrubbing", we -were required to slightly increase the size of the receive replay -counter to something still fairly small, but an increase nonetheless. -It turns out that we can recoup some of the additional memory overhead -by splitting up the prior union type into two distinct types. Before, we -used the same "noise_counter" union for both sending and receiving, with -sending just using a simple atomic64_t, while receiving used the full -replay counter checker. This meant that most of the memory being -allocated for the sending counter was being wasted. Since the old -"noise_counter" type increased in size in the prior commit, now is a -good time to split up that union type into a distinct "noise_replay_ -counter" for receiving and a boring atomic64_t for sending, each using -neither more nor less memory than required. - -Also, since sometimes the replay counter is accessed without -necessitating additional accesses to the bitmap, we can reduce cache -misses by hoisting the always-necessary lock above the bitmap in the -struct layout. We also change a "noise_replay_counter" stack allocation -to kmalloc in a -DDEBUG selftest so that KASAN doesn't trigger a stack -frame warning. - -All and all, removing a bit of abstraction in this commit makes the code -simpler and smaller, in addition to the motivating memory usage -recuperation. For example, passing around raw "noise_symmetric_key" -structs is something that really only makes sense within noise.c, in the -one place where the sending and receiving keys can safely be thought of -as the same type of object; subsequent to that, it's important that we -uniformly access these through keypair->{sending,receiving}, where their -distinct roles are always made explicit. So this patch allows us to draw -that distinction clearly as well. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/noise.c | 16 +++------ - drivers/net/wireguard/noise.h | 14 ++++---- - drivers/net/wireguard/receive.c | 42 ++++++++++++------------ - drivers/net/wireguard/selftest/counter.c | 17 +++++++--- - drivers/net/wireguard/send.c | 12 +++---- - 5 files changed, 48 insertions(+), 53 deletions(-) - ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -104,6 +104,7 @@ static struct noise_keypair *keypair_cre - - if (unlikely(!keypair)) - return NULL; -+ spin_lock_init(&keypair->receiving_counter.lock); - keypair->internal_id = atomic64_inc_return(&keypair_counter); - keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; - keypair->entry.peer = peer; -@@ -358,25 +359,16 @@ out: - memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); - } - --static void symmetric_key_init(struct noise_symmetric_key *key) --{ -- spin_lock_init(&key->counter.receive.lock); -- atomic64_set(&key->counter.counter, 0); -- memset(key->counter.receive.backtrack, 0, -- sizeof(key->counter.receive.backtrack)); -- key->birthdate = ktime_get_coarse_boottime_ns(); -- key->is_valid = true; --} -- - static void derive_keys(struct noise_symmetric_key *first_dst, - struct noise_symmetric_key *second_dst, - const u8 chaining_key[NOISE_HASH_LEN]) - { -+ u64 birthdate = ktime_get_coarse_boottime_ns(); - kdf(first_dst->key, second_dst->key, NULL, NULL, - NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, - chaining_key); -- symmetric_key_init(first_dst); -- symmetric_key_init(second_dst); -+ first_dst->birthdate = second_dst->birthdate = birthdate; -+ first_dst->is_valid = second_dst->is_valid = true; - } - - static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], ---- a/drivers/net/wireguard/noise.h -+++ b/drivers/net/wireguard/noise.h -@@ -15,18 +15,14 @@ - #include <linux/mutex.h> - #include <linux/kref.h> - --union noise_counter { -- struct { -- u64 counter; -- unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; -- spinlock_t lock; -- } receive; -- atomic64_t counter; -+struct noise_replay_counter { -+ u64 counter; -+ spinlock_t lock; -+ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; - }; - - struct noise_symmetric_key { - u8 key[NOISE_SYMMETRIC_KEY_LEN]; -- union noise_counter counter; - u64 birthdate; - bool is_valid; - }; -@@ -34,7 +30,9 @@ struct noise_symmetric_key { - struct noise_keypair { - struct index_hashtable_entry entry; - struct noise_symmetric_key sending; -+ atomic64_t sending_counter; - struct noise_symmetric_key receiving; -+ struct noise_replay_counter receiving_counter; - __le32 remote_index; - bool i_am_the_initiator; - struct kref refcount; ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -245,20 +245,20 @@ static void keep_key_fresh(struct wg_pee - } - } - --static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) -+static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) - { - struct scatterlist sg[MAX_SKB_FRAGS + 8]; - struct sk_buff *trailer; - unsigned int offset; - int num_frags; - -- if (unlikely(!key)) -+ if (unlikely(!keypair)) - return false; - -- if (unlikely(!READ_ONCE(key->is_valid) || -- wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || -- key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { -- WRITE_ONCE(key->is_valid, false); -+ if (unlikely(!READ_ONCE(keypair->receiving.is_valid) || -+ wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) || -+ keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) { -+ WRITE_ONCE(keypair->receiving.is_valid, false); - return false; - } - -@@ -283,7 +283,7 @@ static bool decrypt_packet(struct sk_buf - - if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, - PACKET_CB(skb)->nonce, -- key->key)) -+ keypair->receiving.key)) - return false; - - /* Another ugly situation of pushing and pulling the header so as to -@@ -298,41 +298,41 @@ static bool decrypt_packet(struct sk_buf - } - - /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ --static bool counter_validate(union noise_counter *counter, u64 their_counter) -+static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter) - { - unsigned long index, index_current, top, i; - bool ret = false; - -- spin_lock_bh(&counter->receive.lock); -+ spin_lock_bh(&counter->lock); - -- if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || -+ if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 || - their_counter >= REJECT_AFTER_MESSAGES)) - goto out; - - ++their_counter; - - if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < -- counter->receive.counter)) -+ counter->counter)) - goto out; - - index = their_counter >> ilog2(BITS_PER_LONG); - -- if (likely(their_counter > counter->receive.counter)) { -- index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); -+ if (likely(their_counter > counter->counter)) { -+ index_current = counter->counter >> ilog2(BITS_PER_LONG); - top = min_t(unsigned long, index - index_current, - COUNTER_BITS_TOTAL / BITS_PER_LONG); - for (i = 1; i <= top; ++i) -- counter->receive.backtrack[(i + index_current) & -+ counter->backtrack[(i + index_current) & - ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; -- counter->receive.counter = their_counter; -+ counter->counter = their_counter; - } - - index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; - ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), -- &counter->receive.backtrack[index]); -+ &counter->backtrack[index]); - - out: -- spin_unlock_bh(&counter->receive.lock); -+ spin_unlock_bh(&counter->lock); - return ret; - } - -@@ -472,12 +472,12 @@ int wg_packet_rx_poll(struct napi_struct - if (unlikely(state != PACKET_STATE_CRYPTED)) - goto next; - -- if (unlikely(!counter_validate(&keypair->receiving.counter, -+ if (unlikely(!counter_validate(&keypair->receiving_counter, - PACKET_CB(skb)->nonce))) { - net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", - peer->device->dev->name, - PACKET_CB(skb)->nonce, -- keypair->receiving.counter.receive.counter); -+ keypair->receiving_counter.counter); - goto next; - } - -@@ -511,8 +511,8 @@ void wg_packet_decrypt_worker(struct wor - struct sk_buff *skb; - - while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { -- enum packet_state state = likely(decrypt_packet(skb, -- &PACKET_CB(skb)->keypair->receiving)) ? -+ enum packet_state state = -+ likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? - PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - wg_queue_enqueue_per_peer_napi(skb, state); - if (need_resched()) ---- a/drivers/net/wireguard/selftest/counter.c -+++ b/drivers/net/wireguard/selftest/counter.c -@@ -6,18 +6,24 @@ - #ifdef DEBUG - bool __init wg_packet_counter_selftest(void) - { -+ struct noise_replay_counter *counter; - unsigned int test_num = 0, i; -- union noise_counter counter; - bool success = true; - --#define T_INIT do { \ -- memset(&counter, 0, sizeof(union noise_counter)); \ -- spin_lock_init(&counter.receive.lock); \ -+ counter = kmalloc(sizeof(*counter), GFP_KERNEL); -+ if (unlikely(!counter)) { -+ pr_err("nonce counter self-test malloc: FAIL\n"); -+ return false; -+ } -+ -+#define T_INIT do { \ -+ memset(counter, 0, sizeof(*counter)); \ -+ spin_lock_init(&counter->lock); \ - } while (0) - #define T_LIM (COUNTER_WINDOW_SIZE + 1) - #define T(n, v) do { \ - ++test_num; \ -- if (counter_validate(&counter, n) != (v)) { \ -+ if (counter_validate(counter, n) != (v)) { \ - pr_err("nonce counter self-test %u: FAIL\n", \ - test_num); \ - success = false; \ -@@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(v - - if (success) - pr_info("nonce counter self-tests: pass\n"); -+ kfree(counter); - return success; - } - #endif ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_pee - rcu_read_lock_bh(); - keypair = rcu_dereference_bh(peer->keypairs.current_keypair); - send = keypair && READ_ONCE(keypair->sending.is_valid) && -- (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || -+ (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES || - (keypair->i_am_the_initiator && - wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); - rcu_read_unlock_bh(); -@@ -349,7 +349,6 @@ void wg_packet_purge_staged_packets(stru - - void wg_packet_send_staged_packets(struct wg_peer *peer) - { -- struct noise_symmetric_key *key; - struct noise_keypair *keypair; - struct sk_buff_head packets; - struct sk_buff *skb; -@@ -369,10 +368,9 @@ void wg_packet_send_staged_packets(struc - rcu_read_unlock_bh(); - if (unlikely(!keypair)) - goto out_nokey; -- key = &keypair->sending; -- if (unlikely(!READ_ONCE(key->is_valid))) -+ if (unlikely(!READ_ONCE(keypair->sending.is_valid))) - goto out_nokey; -- if (unlikely(wg_birthdate_has_expired(key->birthdate, -+ if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, - REJECT_AFTER_TIME))) - goto out_invalid; - -@@ -387,7 +385,7 @@ void wg_packet_send_staged_packets(struc - */ - PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); - PACKET_CB(skb)->nonce = -- atomic64_inc_return(&key->counter.counter) - 1; -+ atomic64_inc_return(&keypair->sending_counter) - 1; - if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) - goto out_invalid; - } -@@ -399,7 +397,7 @@ void wg_packet_send_staged_packets(struc - return; - - out_invalid: -- WRITE_ONCE(key->is_valid, false); -+ WRITE_ONCE(keypair->sending.is_valid, false); - out_nokey: - wg_noise_keypair_put(keypair, false); - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0106-wireguard-noise-do-not-assign-initiation-time-in-if-.patch b/target/linux/generic/backport-5.4/080-wireguard-0106-wireguard-noise-do-not-assign-initiation-time-in-if-.patch deleted file mode 100644 index a53c764708..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0106-wireguard-noise-do-not-assign-initiation-time-in-if-.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Frank Werner-Krippendorf <mail@hb9fxq.ch> -Date: Tue, 23 Jun 2020 03:59:44 -0600 -Subject: [PATCH] wireguard: noise: do not assign initiation time in if - condition - -commit 558b353c9c2a717509f291c066c6bd8f5f5e21be upstream. - -Fixes an error condition reported by checkpatch.pl which caused by -assigning a variable in an if condition in wg_noise_handshake_consume_ -initiation(). - -Signed-off-by: Frank Werner-Krippendorf <mail@hb9fxq.ch> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/noise.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -617,8 +617,8 @@ wg_noise_handshake_consume_initiation(st - memcpy(handshake->hash, hash, NOISE_HASH_LEN); - memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); - handshake->remote_index = src->sender_index; -- if ((s64)(handshake->last_initiation_consumption - -- (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) -+ initiation_consumption = ktime_get_coarse_boottime_ns(); -+ if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0) - handshake->last_initiation_consumption = initiation_consumption; - handshake->state = HANDSHAKE_CONSUMED_INITIATION; - up_write(&handshake->lock); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0107-wireguard-device-avoid-circular-netns-references.patch b/target/linux/generic/backport-5.4/080-wireguard-0107-wireguard-device-avoid-circular-netns-references.patch deleted file mode 100644 index 013023a3e2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0107-wireguard-device-avoid-circular-netns-references.patch +++ /dev/null @@ -1,296 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Tue, 23 Jun 2020 03:59:45 -0600 -Subject: [PATCH] wireguard: device: avoid circular netns references - -commit 900575aa33a3eaaef802b31de187a85c4a4b4bd0 upstream. - -Before, we took a reference to the creating netns if the new netns was -different. This caused issues with circular references, with two -wireguard interfaces swapping namespaces. The solution is to rather not -take any extra references at all, but instead simply invalidate the -creating netns pointer when that netns is deleted. - -In order to prevent this from happening again, this commit improves the -rough object leak tracking by allowing it to account for created and -destroyed interfaces, aside from just peers and keys. That then makes it -possible to check for the object leak when having two interfaces take a -reference to each others' namespaces. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 58 ++++++++++------------ - drivers/net/wireguard/device.h | 3 +- - drivers/net/wireguard/netlink.c | 14 ++++-- - drivers/net/wireguard/socket.c | 25 +++++++--- - tools/testing/selftests/wireguard/netns.sh | 13 ++++- - 5 files changed, 67 insertions(+), 46 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -45,17 +45,18 @@ static int wg_open(struct net_device *de - if (dev_v6) - dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; - -+ mutex_lock(&wg->device_update_lock); - ret = wg_socket_init(wg, wg->incoming_port); - if (ret < 0) -- return ret; -- mutex_lock(&wg->device_update_lock); -+ goto out; - list_for_each_entry(peer, &wg->peer_list, peer_list) { - wg_packet_send_staged_packets(peer); - if (peer->persistent_keepalive_interval) - wg_packet_send_keepalive(peer); - } -+out: - mutex_unlock(&wg->device_update_lock); -- return 0; -+ return ret; - } - - #ifdef CONFIG_PM_SLEEP -@@ -225,6 +226,7 @@ static void wg_destruct(struct net_devic - list_del(&wg->device_list); - rtnl_unlock(); - mutex_lock(&wg->device_update_lock); -+ rcu_assign_pointer(wg->creating_net, NULL); - wg->incoming_port = 0; - wg_socket_reinit(wg, NULL, NULL); - /* The final references are cleared in the below calls to destroy_workqueue. */ -@@ -240,13 +242,11 @@ static void wg_destruct(struct net_devic - skb_queue_purge(&wg->incoming_handshakes); - free_percpu(dev->tstats); - free_percpu(wg->incoming_handshakes_worker); -- if (wg->have_creating_net_ref) -- put_net(wg->creating_net); - kvfree(wg->index_hashtable); - kvfree(wg->peer_hashtable); - mutex_unlock(&wg->device_update_lock); - -- pr_debug("%s: Interface deleted\n", dev->name); -+ pr_debug("%s: Interface destroyed\n", dev->name); - free_netdev(dev); - } - -@@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_ne - struct wg_device *wg = netdev_priv(dev); - int ret = -ENOMEM; - -- wg->creating_net = src_net; -+ rcu_assign_pointer(wg->creating_net, src_net); - init_rwsem(&wg->static_identity.lock); - mutex_init(&wg->socket_update_lock); - mutex_init(&wg->device_update_lock); -@@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __r - .newlink = wg_newlink, - }; - --static int wg_netdevice_notification(struct notifier_block *nb, -- unsigned long action, void *data) -+static void wg_netns_pre_exit(struct net *net) - { -- struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; -- struct wg_device *wg = netdev_priv(dev); -- -- ASSERT_RTNL(); -- -- if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) -- return 0; -+ struct wg_device *wg; - -- if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { -- put_net(wg->creating_net); -- wg->have_creating_net_ref = false; -- } else if (dev_net(dev) != wg->creating_net && -- !wg->have_creating_net_ref) { -- wg->have_creating_net_ref = true; -- get_net(wg->creating_net); -+ rtnl_lock(); -+ list_for_each_entry(wg, &device_list, device_list) { -+ if (rcu_access_pointer(wg->creating_net) == net) { -+ pr_debug("%s: Creating namespace exiting\n", wg->dev->name); -+ netif_carrier_off(wg->dev); -+ mutex_lock(&wg->device_update_lock); -+ rcu_assign_pointer(wg->creating_net, NULL); -+ wg_socket_reinit(wg, NULL, NULL); -+ mutex_unlock(&wg->device_update_lock); -+ } - } -- return 0; -+ rtnl_unlock(); - } - --static struct notifier_block netdevice_notifier = { -- .notifier_call = wg_netdevice_notification -+static struct pernet_operations pernet_ops = { -+ .pre_exit = wg_netns_pre_exit - }; - - int __init wg_device_init(void) -@@ -429,18 +425,18 @@ int __init wg_device_init(void) - return ret; - #endif - -- ret = register_netdevice_notifier(&netdevice_notifier); -+ ret = register_pernet_device(&pernet_ops); - if (ret) - goto error_pm; - - ret = rtnl_link_register(&link_ops); - if (ret) -- goto error_netdevice; -+ goto error_pernet; - - return 0; - --error_netdevice: -- unregister_netdevice_notifier(&netdevice_notifier); -+error_pernet: -+ unregister_pernet_device(&pernet_ops); - error_pm: - #ifdef CONFIG_PM_SLEEP - unregister_pm_notifier(&pm_notifier); -@@ -451,7 +447,7 @@ error_pm: - void wg_device_uninit(void) - { - rtnl_link_unregister(&link_ops); -- unregister_netdevice_notifier(&netdevice_notifier); -+ unregister_pernet_device(&pernet_ops); - #ifdef CONFIG_PM_SLEEP - unregister_pm_notifier(&pm_notifier); - #endif ---- a/drivers/net/wireguard/device.h -+++ b/drivers/net/wireguard/device.h -@@ -40,7 +40,7 @@ struct wg_device { - struct net_device *dev; - struct crypt_queue encrypt_queue, decrypt_queue; - struct sock __rcu *sock4, *sock6; -- struct net *creating_net; -+ struct net __rcu *creating_net; - struct noise_static_identity static_identity; - struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; - struct workqueue_struct *packet_crypt_wq; -@@ -56,7 +56,6 @@ struct wg_device { - unsigned int num_peers, device_update_gen; - u32 fwmark; - u16 incoming_port; -- bool have_creating_net_ref; - }; - - int wg_device_init(void); ---- a/drivers/net/wireguard/netlink.c -+++ b/drivers/net/wireguard/netlink.c -@@ -517,11 +517,15 @@ static int wg_set_device(struct sk_buff - if (flags & ~__WGDEVICE_F_ALL) - goto out; - -- ret = -EPERM; -- if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || -- info->attrs[WGDEVICE_A_FWMARK]) && -- !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) -- goto out; -+ if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { -+ struct net *net; -+ rcu_read_lock(); -+ net = rcu_dereference(wg->creating_net); -+ ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; -+ rcu_read_unlock(); -+ if (ret) -+ goto out; -+ } - - ++wg->device_update_gen; - ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -347,6 +347,7 @@ static void set_sock_opts(struct socket - - int wg_socket_init(struct wg_device *wg, u16 port) - { -+ struct net *net; - int ret; - struct udp_tunnel_sock_cfg cfg = { - .sk_user_data = wg, -@@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, - }; - #endif - -+ rcu_read_lock(); -+ net = rcu_dereference(wg->creating_net); -+ net = net ? maybe_get_net(net) : NULL; -+ rcu_read_unlock(); -+ if (unlikely(!net)) -+ return -ENONET; -+ - #if IS_ENABLED(CONFIG_IPV6) - retry: - #endif - -- ret = udp_sock_create(wg->creating_net, &port4, &new4); -+ ret = udp_sock_create(net, &port4, &new4); - if (ret < 0) { - pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); -- return ret; -+ goto out; - } - set_sock_opts(new4); -- setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); -+ setup_udp_tunnel_sock(net, new4, &cfg); - - #if IS_ENABLED(CONFIG_IPV6) - if (ipv6_mod_enabled()) { - port6.local_udp_port = inet_sk(new4->sk)->inet_sport; -- ret = udp_sock_create(wg->creating_net, &port6, &new6); -+ ret = udp_sock_create(net, &port6, &new6); - if (ret < 0) { - udp_tunnel_sock_release(new4); - if (ret == -EADDRINUSE && !port && retries++ < 100) - goto retry; - pr_err("%s: Could not create IPv6 socket\n", - wg->dev->name); -- return ret; -+ goto out; - } - set_sock_opts(new6); -- setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); -+ setup_udp_tunnel_sock(net, new6, &cfg); - } - #endif - - wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); -- return 0; -+ ret = 0; -+out: -+ put_net(net); -+ return ret; - } - - void wg_socket_reinit(struct wg_device *wg, struct sock *new4, ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -587,9 +587,20 @@ ip0 link set wg0 up - kill $ncat_pid - ip0 link del wg0 - -+# Ensure there aren't circular reference loops -+ip1 link add wg1 type wireguard -+ip2 link add wg2 type wireguard -+ip1 link set wg1 netns $netns2 -+ip2 link set wg2 netns $netns1 -+pp ip netns delete $netns1 -+pp ip netns delete $netns2 -+pp ip netns add $netns1 -+pp ip netns add $netns2 -+ -+sleep 2 # Wait for cleanup and grace periods - declare -A objects - while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do -- [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue -+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue - objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" - done < /dev/kmsg - alldeleted=1 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0108-wireguard-receive-account-for-napi_gro_receive-never.patch b/target/linux/generic/backport-5.4/080-wireguard-0108-wireguard-receive-account-for-napi_gro_receive-never.patch deleted file mode 100644 index eceb0b9255..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0108-wireguard-receive-account-for-napi_gro_receive-never.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 24 Jun 2020 16:06:03 -0600 -Subject: [PATCH] wireguard: receive: account for napi_gro_receive never - returning GRO_DROP - -commit df08126e3833e9dca19e2407db5f5860a7c194fb upstream. - -The napi_gro_receive function no longer returns GRO_DROP ever, making -handling GRO_DROP dead code. This commit removes that dead code. -Further, it's not even clear that device drivers have any business in -taking action after passing off received packets; that's arguably out of -their hands. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/receive.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -414,14 +414,8 @@ static void wg_packet_consume_data_done( - if (unlikely(routed_peer != peer)) - goto dishonest_packet_peer; - -- if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { -- ++dev->stats.rx_dropped; -- net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", -- dev->name, peer->internal_id, -- &peer->endpoint.addr); -- } else { -- update_rx_stats(peer, message_data_len(len_before_trim)); -- } -+ napi_gro_receive(&peer->napi, skb); -+ update_rx_stats(peer, message_data_len(len_before_trim)); - return; - - dishonest_packet_peer: diff --git a/target/linux/generic/backport-5.4/080-wireguard-0109-net-ip_tunnel-add-header_ops-for-layer-3-devices.patch b/target/linux/generic/backport-5.4/080-wireguard-0109-net-ip_tunnel-add-header_ops-for-layer-3-devices.patch deleted file mode 100644 index cfd6b1457c..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0109-net-ip_tunnel-add-header_ops-for-layer-3-devices.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 29 Jun 2020 19:06:18 -0600 -Subject: [PATCH] net: ip_tunnel: add header_ops for layer 3 devices - -commit 2606aff916854b61234bf85001be9777bab2d5f8 upstream. - -Some devices that take straight up layer 3 packets benefit from having a -shared header_ops so that AF_PACKET sockets can inject packets that are -recognized. This shared infrastructure will be used by other drivers -that currently can't inject packets using AF_PACKET. It also exposes the -parser function, as it is useful in standalone form too. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Acked-by: Willem de Bruijn <willemb@google.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - include/net/ip_tunnels.h | 3 +++ - net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++ - 2 files changed, 21 insertions(+) - ---- a/include/net/ip_tunnels.h -+++ b/include/net/ip_tunnels.h -@@ -289,6 +289,9 @@ int ip_tunnel_newlink(struct net_device - struct ip_tunnel_parm *p, __u32 fwmark); - void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); - -+extern const struct header_ops ip_tunnel_header_ops; -+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); -+ - struct ip_tunnel_encap_ops { - size_t (*encap_hlen)(struct ip_tunnel_encap *e); - int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e, ---- a/net/ipv4/ip_tunnel_core.c -+++ b/net/ipv4/ip_tunnel_core.c -@@ -446,3 +446,21 @@ void ip_tunnel_unneed_metadata(void) - static_branch_dec(&ip_tunnel_metadata_cnt); - } - EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata); -+ -+/* Returns either the correct skb->protocol value, or 0 if invalid. */ -+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb) -+{ -+ if (skb_network_header(skb) >= skb->head && -+ (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && -+ ip_hdr(skb)->version == 4) -+ return htons(ETH_P_IP); -+ if (skb_network_header(skb) >= skb->head && -+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && -+ ipv6_hdr(skb)->version == 6) -+ return htons(ETH_P_IPV6); -+ return 0; -+} -+EXPORT_SYMBOL(ip_tunnel_parse_protocol); -+ -+const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; -+EXPORT_SYMBOL(ip_tunnel_header_ops); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0110-wireguard-implement-header_ops-parse_protocol-for-AF.patch b/target/linux/generic/backport-5.4/080-wireguard-0110-wireguard-implement-header_ops-parse_protocol-for-AF.patch deleted file mode 100644 index 415ecffeef..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0110-wireguard-implement-header_ops-parse_protocol-for-AF.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 29 Jun 2020 19:06:20 -0600 -Subject: [PATCH] wireguard: implement header_ops->parse_protocol for AF_PACKET - -commit 01a4967c71c004f8ecad4ab57021348636502fa9 upstream. - -WireGuard uses skb->protocol to determine packet type, and bails out if -it's not set or set to something it's not expecting. For AF_PACKET -injection, we need to support its call chain of: - - packet_sendmsg -> packet_snd -> packet_parse_headers -> - dev_parse_header_protocol -> parse_protocol - -Without a valid parse_protocol, this returns zero, and wireguard then -rejects the skb. So, this wires up the ip_tunnel handler for layer 3 -packets for that case. - -Reported-by: Hans Wippel <ndev@hwipl.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -262,6 +262,7 @@ static void wg_setup(struct net_device * - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); - - dev->netdev_ops = &netdev_ops; -+ dev->header_ops = &ip_tunnel_header_ops; - dev->hard_header_len = 0; - dev->addr_len = 0; - dev->needed_headroom = DATA_PACKET_HEAD_ROOM; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0111-wireguard-queueing-make-use-of-ip_tunnel_parse_proto.patch b/target/linux/generic/backport-5.4/080-wireguard-0111-wireguard-queueing-make-use-of-ip_tunnel_parse_proto.patch deleted file mode 100644 index a777732ce7..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0111-wireguard-queueing-make-use-of-ip_tunnel_parse_proto.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 29 Jun 2020 19:06:21 -0600 -Subject: [PATCH] wireguard: queueing: make use of ip_tunnel_parse_protocol - -commit 1a574074ae7d1d745c16f7710655f38a53174c27 upstream. - -Now that wg_examine_packet_protocol has been added for general -consumption as ip_tunnel_parse_protocol, it's possible to remove -wg_examine_packet_protocol and simply use the new -ip_tunnel_parse_protocol function directly. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/queueing.h | 19 ++----------------- - drivers/net/wireguard/receive.c | 2 +- - 2 files changed, 3 insertions(+), 18 deletions(-) - ---- a/drivers/net/wireguard/queueing.h -+++ b/drivers/net/wireguard/queueing.h -@@ -11,6 +11,7 @@ - #include <linux/skbuff.h> - #include <linux/ip.h> - #include <linux/ipv6.h> -+#include <net/ip_tunnels.h> - - struct wg_device; - struct wg_peer; -@@ -65,25 +66,9 @@ struct packet_cb { - #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) - #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) - --/* Returns either the correct skb->protocol value, or 0 if invalid. */ --static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) --{ -- if (skb_network_header(skb) >= skb->head && -- (skb_network_header(skb) + sizeof(struct iphdr)) <= -- skb_tail_pointer(skb) && -- ip_hdr(skb)->version == 4) -- return htons(ETH_P_IP); -- if (skb_network_header(skb) >= skb->head && -- (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= -- skb_tail_pointer(skb) && -- ipv6_hdr(skb)->version == 6) -- return htons(ETH_P_IPV6); -- return 0; --} -- - static inline bool wg_check_packet_protocol(struct sk_buff *skb) - { -- __be16 real_protocol = wg_examine_packet_protocol(skb); -+ __be16 real_protocol = ip_tunnel_parse_protocol(skb); - return real_protocol && skb->protocol == real_protocol; - } - ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done( - */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->csum_level = ~0; /* All levels */ -- skb->protocol = wg_examine_packet_protocol(skb); -+ skb->protocol = ip_tunnel_parse_protocol(skb); - if (skb->protocol == htons(ETH_P_IP)) { - len = ntohs(ip_hdr(skb)->tot_len); - if (unlikely(len < sizeof(struct iphdr))) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0112-netlink-consistently-use-NLA_POLICY_EXACT_LEN.patch b/target/linux/generic/backport-5.4/080-wireguard-0112-netlink-consistently-use-NLA_POLICY_EXACT_LEN.patch deleted file mode 100644 index 4b2712bb2d..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0112-netlink-consistently-use-NLA_POLICY_EXACT_LEN.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Johannes Berg <johannes.berg@intel.com> -Date: Tue, 18 Aug 2020 10:17:31 +0200 -Subject: [PATCH] netlink: consistently use NLA_POLICY_EXACT_LEN() - -commit 8140860c817f3e9f78bcd1e420b9777ddcbaa629 upstream. - -Change places that open-code NLA_POLICY_EXACT_LEN() to -use the macro instead, giving us flexibility in how we -handle the details of the macro. - -Signed-off-by: Johannes Berg <johannes.berg@intel.com> -Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net> -Signed-off-by: David S. Miller <davem@davemloft.net> -[Jason: only picked the drivers/net/wireguard/* part] -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/netlink.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/drivers/net/wireguard/netlink.c -+++ b/drivers/net/wireguard/netlink.c -@@ -22,8 +22,8 @@ static struct genl_family genl_family; - static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { - [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, - [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, -- [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -- [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -+ [WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), -+ [WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), - [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, - [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, - [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, -@@ -31,12 +31,12 @@ static const struct nla_policy device_po - }; - - static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { -- [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, -- [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, -+ [WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), -+ [WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN), - [WGPEER_A_FLAGS] = { .type = NLA_U32 }, - [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, - [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, -- [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, -+ [WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)), - [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, - [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, - [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, diff --git a/target/linux/generic/backport-5.4/080-wireguard-0113-netlink-consistently-use-NLA_POLICY_MIN_LEN.patch b/target/linux/generic/backport-5.4/080-wireguard-0113-netlink-consistently-use-NLA_POLICY_MIN_LEN.patch deleted file mode 100644 index 4b414bc309..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0113-netlink-consistently-use-NLA_POLICY_MIN_LEN.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Johannes Berg <johannes.berg@intel.com> -Date: Tue, 18 Aug 2020 10:17:32 +0200 -Subject: [PATCH] netlink: consistently use NLA_POLICY_MIN_LEN() - -commit bc0435855041d7fff0b83dd992fc4be34aa11afb upstream. - -Change places that open-code NLA_POLICY_MIN_LEN() to -use the macro instead, giving us flexibility in how we -handle the details of the macro. - -Signed-off-by: Johannes Berg <johannes.berg@intel.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -[Jason: only picked the drivers/net/wireguard/* part] -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/netlink.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/netlink.c -+++ b/drivers/net/wireguard/netlink.c -@@ -34,7 +34,7 @@ static const struct nla_policy peer_poli - [WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), - [WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN), - [WGPEER_A_FLAGS] = { .type = NLA_U32 }, -- [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, -+ [WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)), - [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, - [WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)), - [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, -@@ -45,7 +45,7 @@ static const struct nla_policy peer_poli - - static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { - [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, -- [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, -+ [WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)), - [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } - }; - diff --git a/target/linux/generic/backport-5.4/080-wireguard-0114-wireguard-noise-take-lock-when-removing-handshake-en.patch b/target/linux/generic/backport-5.4/080-wireguard-0114-wireguard-noise-take-lock-when-removing-handshake-en.patch deleted file mode 100644 index e80528c91b..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0114-wireguard-noise-take-lock-when-removing-handshake-en.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 9 Sep 2020 13:58:14 +0200 -Subject: [PATCH] wireguard: noise: take lock when removing handshake entry - from table - -commit 9179ba31367bcf481c3c79b5f028c94faad9f30a upstream. - -Eric reported that syzkaller found a race of this variety: - -CPU 1 CPU 2 --------------------------------------------|--------------------------------------- -wg_index_hashtable_replace(old, ...) | - if (hlist_unhashed(&old->index_hash)) | - | wg_index_hashtable_remove(old) - | hlist_del_init_rcu(&old->index_hash) - | old->index_hash.pprev = NULL - hlist_replace_rcu(&old->index_hash, ...) | - *old->index_hash.pprev | - -Syzbot wasn't actually able to reproduce this more than once or create a -reproducer, because the race window between checking "hlist_unhashed" and -calling "hlist_replace_rcu" is just so small. Adding an mdelay(5) or -similar there helps make this demonstrable using this simple script: - - #!/bin/bash - set -ex - trap 'kill $pid1; kill $pid2; ip link del wg0; ip link del wg1' EXIT - ip link add wg0 type wireguard - ip link add wg1 type wireguard - wg set wg0 private-key <(wg genkey) listen-port 9999 - wg set wg1 private-key <(wg genkey) peer $(wg show wg0 public-key) endpoint 127.0.0.1:9999 persistent-keepalive 1 - wg set wg0 peer $(wg show wg1 public-key) - ip link set wg0 up - yes link set wg1 up | ip -force -batch - & - pid1=$! - yes link set wg1 down | ip -force -batch - & - pid2=$! - wait - -The fundumental underlying problem is that we permit calls to wg_index_ -hashtable_remove(handshake.entry) without requiring the caller to take -the handshake mutex that is intended to protect members of handshake -during mutations. This is consistently the case with calls to wg_index_ -hashtable_insert(handshake.entry) and wg_index_hashtable_replace( -handshake.entry), but it's missing from a pertinent callsite of wg_ -index_hashtable_remove(handshake.entry). So, this patch makes sure that -mutex is taken. - -The original code was a little bit funky though, in the form of: - - remove(handshake.entry) - lock(), memzero(handshake.some_members), unlock() - remove(handshake.entry) - -The original intention of that double removal pattern outside the lock -appears to be some attempt to prevent insertions that might happen while -locks are dropped during expensive crypto operations, but actually, all -callers of wg_index_hashtable_insert(handshake.entry) take the write -lock and then explicitly check handshake.state, as they should, which -the aforementioned memzero clears, which means an insertion should -already be impossible. And regardless, the original intention was -necessarily racy, since it wasn't guaranteed that something else would -run after the unlock() instead of after the remove(). So, from a -soundness perspective, it seems positive to remove what looks like a -hack at best. - -The crash from both syzbot and from the script above is as follows: - - general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN - KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] - CPU: 0 PID: 7395 Comm: kworker/0:3 Not tainted 5.9.0-rc4-syzkaller #0 - Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 - Workqueue: wg-kex-wg1 wg_packet_handshake_receive_worker - RIP: 0010:hlist_replace_rcu include/linux/rculist.h:505 [inline] - RIP: 0010:wg_index_hashtable_replace+0x176/0x330 drivers/net/wireguard/peerlookup.c:174 - Code: 00 fc ff df 48 89 f9 48 c1 e9 03 80 3c 01 00 0f 85 44 01 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 10 48 89 c6 48 c1 ee 03 <80> 3c 0e 00 0f 85 06 01 00 00 48 85 d2 4c 89 28 74 47 e8 a3 4f b5 - RSP: 0018:ffffc90006a97bf8 EFLAGS: 00010246 - RAX: 0000000000000000 RBX: ffff888050ffc4f8 RCX: dffffc0000000000 - RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88808e04e010 - RBP: ffff88808e04e000 R08: 0000000000000001 R09: ffff8880543d0000 - R10: ffffed100a87a000 R11: 000000000000016e R12: ffff8880543d0000 - R13: ffff88808e04e008 R14: ffff888050ffc508 R15: ffff888050ffc500 - FS: 0000000000000000(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000 - CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - CR2: 00000000f5505db0 CR3: 0000000097cf7000 CR4: 00000000001526f0 - DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 - Call Trace: - wg_noise_handshake_begin_session+0x752/0xc9a drivers/net/wireguard/noise.c:820 - wg_receive_handshake_packet drivers/net/wireguard/receive.c:183 [inline] - wg_packet_handshake_receive_worker+0x33b/0x730 drivers/net/wireguard/receive.c:220 - process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 - worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 - kthread+0x3b5/0x4a0 kernel/kthread.c:292 - ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 - -Reported-by: syzbot <syzkaller@googlegroups.com> -Reported-by: Eric Dumazet <edumazet@google.com> -Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/noise.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - ---- a/drivers/net/wireguard/noise.c -+++ b/drivers/net/wireguard/noise.c -@@ -87,15 +87,12 @@ static void handshake_zero(struct noise_ - - void wg_noise_handshake_clear(struct noise_handshake *handshake) - { -+ down_write(&handshake->lock); - wg_index_hashtable_remove( - handshake->entry.peer->device->index_hashtable, - &handshake->entry); -- down_write(&handshake->lock); - handshake_zero(handshake); - up_write(&handshake->lock); -- wg_index_hashtable_remove( -- handshake->entry.peer->device->index_hashtable, -- &handshake->entry); - } - - static struct noise_keypair *keypair_create(struct wg_peer *peer) diff --git a/target/linux/generic/backport-5.4/080-wireguard-0115-wireguard-peerlookup-take-lock-before-checking-hash-.patch b/target/linux/generic/backport-5.4/080-wireguard-0115-wireguard-peerlookup-take-lock-before-checking-hash-.patch deleted file mode 100644 index e7f46ddf9c..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0115-wireguard-peerlookup-take-lock-before-checking-hash-.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Wed, 9 Sep 2020 13:58:15 +0200 -Subject: [PATCH] wireguard: peerlookup: take lock before checking hash in - replace operation - -commit 6147f7b1e90ff09bd52afc8b9206a7fcd133daf7 upstream. - -Eric's suggested fix for the previous commit's mentioned race condition -was to simply take the table->lock in wg_index_hashtable_replace(). The -table->lock of the hash table is supposed to protect the bucket heads, -not the entires, but actually, since all the mutator functions are -already taking it, it makes sense to take it too for the test to -hlist_unhashed, as a defense in depth measure, so that it no longer -races with deletions, regardless of what other locks are protecting -individual entries. This is sensible from a performance perspective -because, as Eric pointed out, the case of being unhashed is already the -unlikely case, so this won't add common contention. And comparing -instructions, this basically doesn't make much of a difference other -than pushing and popping %r13, used by the new `bool ret`. More -generally, I like the idea of locking consistency across table mutator -functions, and this might let me rest slightly easier at night. - -Suggested-by: Eric Dumazet <edumazet@google.com> -Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/peerlookup.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - ---- a/drivers/net/wireguard/peerlookup.c -+++ b/drivers/net/wireguard/peerlookup.c -@@ -167,9 +167,13 @@ bool wg_index_hashtable_replace(struct i - struct index_hashtable_entry *old, - struct index_hashtable_entry *new) - { -- if (unlikely(hlist_unhashed(&old->index_hash))) -- return false; -+ bool ret; -+ - spin_lock_bh(&table->lock); -+ ret = !hlist_unhashed(&old->index_hash); -+ if (unlikely(!ret)) -+ goto out; -+ - new->index = old->index; - hlist_replace_rcu(&old->index_hash, &new->index_hash); - -@@ -180,8 +184,9 @@ bool wg_index_hashtable_replace(struct i - * simply gets dropped, which isn't terrible. - */ - INIT_HLIST_NODE(&old->index_hash); -+out: - spin_unlock_bh(&table->lock); -- return true; -+ return ret; - } - - void wg_index_hashtable_remove(struct index_hashtable *table, diff --git a/target/linux/generic/backport-5.4/080-wireguard-0116-wireguard-selftests-check-that-route_me_harder-packe.patch b/target/linux/generic/backport-5.4/080-wireguard-0116-wireguard-selftests-check-that-route_me_harder-packe.patch deleted file mode 100644 index 09c1b0b8f8..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0116-wireguard-selftests-check-that-route_me_harder-packe.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 29 Oct 2020 03:56:05 +0100 -Subject: [PATCH] wireguard: selftests: check that route_me_harder packets use - the right sk - -commit af8afcf1fdd5f365f70e2386c2d8c7a1abd853d7 upstream. - -If netfilter changes the packet mark, the packet is rerouted. The -ip_route_me_harder family of functions fails to use the right sk, opting -to instead use skb->sk, resulting in a routing loop when used with -tunnels. With the next change fixing this issue in netfilter, test for -the relevant condition inside our test suite, since wireguard was where -the bug was discovered. - -Reported-by: Chen Minqiang <ptpt52@gmail.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 8 ++++++++ - tools/testing/selftests/wireguard/qemu/kernel.config | 2 ++ - 2 files changed, 10 insertions(+) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -316,6 +316,14 @@ pp sleep 3 - n2 ping -W 1 -c 1 192.168.241.1 - n1 wg set wg0 peer "$pub2" persistent-keepalive 0 - -+# Test that sk_bound_dev_if works -+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 -+# What about when the mark changes and the packet must be rerouted? -+n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 -+n1 ping -c 1 -W 1 192.168.241.2 # First the boring case -+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case -+n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 -+ - # Test that onion routing works, even when it loops - n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 - ip1 addr add 192.168.242.1/24 dev wg0 ---- a/tools/testing/selftests/wireguard/qemu/kernel.config -+++ b/tools/testing/selftests/wireguard/qemu/kernel.config -@@ -18,10 +18,12 @@ CONFIG_NF_NAT=y - CONFIG_NETFILTER_XTABLES=y - CONFIG_NETFILTER_XT_NAT=y - CONFIG_NETFILTER_XT_MATCH_LENGTH=y -+CONFIG_NETFILTER_XT_MARK=y - CONFIG_NF_CONNTRACK_IPV4=y - CONFIG_NF_NAT_IPV4=y - CONFIG_IP_NF_IPTABLES=y - CONFIG_IP_NF_FILTER=y -+CONFIG_IP_NF_MANGLE=y - CONFIG_IP_NF_NAT=y - CONFIG_IP_ADVANCED_ROUTER=y - CONFIG_IP_MULTIPLE_TABLES=y diff --git a/target/linux/generic/backport-5.4/080-wireguard-0117-wireguard-avoid-double-unlikely-notation-when-using-.patch b/target/linux/generic/backport-5.4/080-wireguard-0117-wireguard-avoid-double-unlikely-notation-when-using-.patch deleted file mode 100644 index 7dfc1bb919..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0117-wireguard-avoid-double-unlikely-notation-when-using-.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Antonio Quartulli <a@unstable.cc> -Date: Mon, 22 Feb 2021 17:25:43 +0100 -Subject: [PATCH] wireguard: avoid double unlikely() notation when using - IS_ERR() - -commit 30ac4e2f54ec067b7b9ca0db27e75681581378d6 upstream. - -The definition of IS_ERR() already applies the unlikely() notation -when checking the error status of the passed pointer. For this -reason there is no need to have the same notation outside of -IS_ERR() itself. - -Clean up code by removing redundant notation. - -Signed-off-by: Antonio Quartulli <a@unstable.cc> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 2 +- - drivers/net/wireguard/socket.c | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -157,7 +157,7 @@ static netdev_tx_t wg_xmit(struct sk_buf - } else { - struct sk_buff *segs = skb_gso_segment(skb, 0); - -- if (unlikely(IS_ERR(segs))) { -+ if (IS_ERR(segs)) { - ret = PTR_ERR(segs); - goto err_peer; - } ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -71,7 +71,7 @@ static int send4(struct wg_device *wg, s - ip_rt_put(rt); - rt = ip_route_output_flow(sock_net(sock), &fl, sock); - } -- if (unlikely(IS_ERR(rt))) { -+ if (IS_ERR(rt)) { - ret = PTR_ERR(rt); - net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", - wg->dev->name, &endpoint->addr, ret); -@@ -138,7 +138,7 @@ static int send6(struct wg_device *wg, s - } - dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, - NULL); -- if (unlikely(IS_ERR(dst))) { -+ if (IS_ERR(dst)) { - ret = PTR_ERR(dst); - net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", - wg->dev->name, &endpoint->addr, ret); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0118-wireguard-socket-remove-bogus-__be32-annotation.patch b/target/linux/generic/backport-5.4/080-wireguard-0118-wireguard-socket-remove-bogus-__be32-annotation.patch deleted file mode 100644 index 1796f54de9..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0118-wireguard-socket-remove-bogus-__be32-annotation.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jann Horn <jannh@google.com> -Date: Mon, 22 Feb 2021 17:25:44 +0100 -Subject: [PATCH] wireguard: socket: remove bogus __be32 annotation - -commit 7f57bd8dc22de35ddd895294aa554003e4f19a72 upstream. - -The endpoint->src_if4 has nothing to do with fixed-endian numbers; remove -the bogus annotation. - -This was introduced in -https://git.zx2c4.com/wireguard-monolithic-historical/commit?id=14e7d0a499a676ec55176c0de2f9fcbd34074a82 -in the historical WireGuard repo because the old code used to -zero-initialize multiple members as follows: - - endpoint->src4.s_addr = endpoint->src_if4 = fl.saddr = 0; - -Because fl.saddr is fixed-endian and an assignment returns a value with the -type of its left operand, this meant that sparse detected an assignment -between values of different endianness. - -Since then, this assignment was already split up into separate statements; -just the cast survived. - -Signed-off-by: Jann Horn <jannh@google.com> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/socket.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -53,7 +53,7 @@ static int send4(struct wg_device *wg, s - if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, - fl.saddr, RT_SCOPE_HOST))) { - endpoint->src4.s_addr = 0; -- *(__force __be32 *)&endpoint->src_if4 = 0; -+ endpoint->src_if4 = 0; - fl.saddr = 0; - if (cache) - dst_cache_reset(cache); -@@ -63,7 +63,7 @@ static int send4(struct wg_device *wg, s - PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && - rt->dst.dev->ifindex != endpoint->src_if4)))) { - endpoint->src4.s_addr = 0; -- *(__force __be32 *)&endpoint->src_if4 = 0; -+ endpoint->src_if4 = 0; - fl.saddr = 0; - if (cache) - dst_cache_reset(cache); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0119-wireguard-selftests-test-multiple-parallel-streams.patch b/target/linux/generic/backport-5.4/080-wireguard-0119-wireguard-selftests-test-multiple-parallel-streams.patch deleted file mode 100644 index 3093de45f7..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0119-wireguard-selftests-test-multiple-parallel-streams.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 22 Feb 2021 17:25:45 +0100 -Subject: [PATCH] wireguard: selftests: test multiple parallel streams - -commit d5a49aa6c3e264a93a7d08485d66e346be0969dd upstream. - -In order to test ndo_start_xmit being called in parallel, explicitly add -separate tests, which should all run on different cores. This should -help tease out bugs associated with queueing up packets from different -cores in parallel. Currently, it hasn't found those types of bugs, but -given future planned work, this is a useful regression to avoid. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 - ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } - ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } - sleep() { read -t "$1" -N 1 || true; } --waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } -+waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } - waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } - waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } - -@@ -141,6 +141,19 @@ tests() { - n2 iperf3 -s -1 -B fd00::2 & - waitiperf $netns2 $! - n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 -+ -+ # TCP over IPv4, in parallel -+ for max in 4 5 50; do -+ local pids=( ) -+ for ((i=0; i < max; ++i)) do -+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & -+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) -+ done -+ for ((i=0; i < max; ++i)) do -+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & -+ done -+ wait "${pids[@]}" -+ done - } - - [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" diff --git a/target/linux/generic/backport-5.4/080-wireguard-0120-wireguard-peer-put-frequently-used-members-above-cac.patch b/target/linux/generic/backport-5.4/080-wireguard-0120-wireguard-peer-put-frequently-used-members-above-cac.patch deleted file mode 100644 index 69e76b96e3..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0120-wireguard-peer-put-frequently-used-members-above-cac.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 22 Feb 2021 17:25:46 +0100 -Subject: [PATCH] wireguard: peer: put frequently used members above cache - lines - -commit 5a0598695634a6bb4126818902dd9140cd9df8b6 upstream. - -The is_dead boolean is checked for every single packet, while the -internal_id member is used basically only for pr_debug messages. So it -makes sense to hoist up is_dead into some space formerly unused by a -struct hole, while demoting internal_api to below the lowest struct -cache line. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/peer.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/wireguard/peer.h -+++ b/drivers/net/wireguard/peer.h -@@ -39,6 +39,7 @@ struct wg_peer { - struct crypt_queue tx_queue, rx_queue; - struct sk_buff_head staged_packet_queue; - int serial_work_cpu; -+ bool is_dead; - struct noise_keypairs keypairs; - struct endpoint endpoint; - struct dst_cache endpoint_cache; -@@ -61,9 +62,8 @@ struct wg_peer { - struct rcu_head rcu; - struct list_head peer_list; - struct list_head allowedips_list; -- u64 internal_id; - struct napi_struct napi; -- bool is_dead; -+ u64 internal_id; - }; - - struct wg_peer *wg_peer_create(struct wg_device *wg, diff --git a/target/linux/generic/backport-5.4/080-wireguard-0121-wireguard-device-do-not-generate-ICMP-for-non-IP-pac.patch b/target/linux/generic/backport-5.4/080-wireguard-0121-wireguard-device-do-not-generate-ICMP-for-non-IP-pac.patch deleted file mode 100644 index 073ee9b0d5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0121-wireguard-device-do-not-generate-ICMP-for-non-IP-pac.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 22 Feb 2021 17:25:47 +0100 -Subject: [PATCH] wireguard: device: do not generate ICMP for non-IP packets - -commit 99fff5264e7ab06f45b0ad60243475be0a8d0559 upstream. - -If skb->protocol doesn't match the actual skb->data header, it's -probably not a good idea to pass it off to icmp{,v6}_ndo_send, which is -expecting to reply to a valid IP packet. So this commit has that early -mismatch case jump to a later error label. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buf - else if (skb->protocol == htons(ETH_P_IPV6)) - net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", - dev->name, &ipv6_hdr(skb)->daddr); -- goto err; -+ goto err_icmp; - } - - family = READ_ONCE(peer->endpoint.addr.sa_family); -@@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buf - - err_peer: - wg_peer_put(peer); --err: -- ++dev->stats.tx_errors; -+err_icmp: - if (skb->protocol == htons(ETH_P_IP)) - icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); - else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); -+err: -+ ++dev->stats.tx_errors; - kfree_skb(skb); - return ret; - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0122-wireguard-queueing-get-rid-of-per-peer-ring-buffers.patch b/target/linux/generic/backport-5.4/080-wireguard-0122-wireguard-queueing-get-rid-of-per-peer-ring-buffers.patch deleted file mode 100644 index 9dc7ddae7f..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0122-wireguard-queueing-get-rid-of-per-peer-ring-buffers.patch +++ /dev/null @@ -1,560 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 22 Feb 2021 17:25:48 +0100 -Subject: [PATCH] wireguard: queueing: get rid of per-peer ring buffers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 8b5553ace83cced775eefd0f3f18b5c6214ccf7a upstream. - -Having two ring buffers per-peer means that every peer results in two -massive ring allocations. On an 8-core x86_64 machine, this commit -reduces the per-peer allocation from 18,688 bytes to 1,856 bytes, which -is an 90% reduction. Ninety percent! With some single-machine -deployments approaching 500,000 peers, we're talking about a reduction -from 7 gigs of memory down to 700 megs of memory. - -In order to get rid of these per-peer allocations, this commit switches -to using a list-based queueing approach. Currently GSO fragments are -chained together using the skb->next pointer (the skb_list_* singly -linked list approach), so we form the per-peer queue around the unused -skb->prev pointer (which sort of makes sense because the links are -pointing backwards). Use of skb_queue_* is not possible here, because -that is based on doubly linked lists and spinlocks. Multiple cores can -write into the queue at any given time, because its writes occur in the -start_xmit path or in the udp_recv path. But reads happen in a single -workqueue item per-peer, amounting to a multi-producer, single-consumer -paradigm. - -The MPSC queue is implemented locklessly and never blocks. However, it -is not linearizable (though it is serializable), with a very tight and -unlikely race on writes, which, when hit (some tiny fraction of the -0.15% of partial adds on a fully loaded 16-core x86_64 system), causes -the queue reader to terminate early. However, because every packet sent -queues up the same workqueue item after it is fully added, the worker -resumes again, and stopping early isn't actually a problem, since at -that point the packet wouldn't have yet been added to the encryption -queue. These properties allow us to avoid disabling interrupts or -spinning. The design is based on Dmitry Vyukov's algorithm [1]. - -Performance-wise, ordinarily list-based queues aren't preferable to -ringbuffers, because of cache misses when following pointers around. -However, we *already* have to follow the adjacent pointers when working -through fragments, so there shouldn't actually be any change there. A -potential downside is that dequeueing is a bit more complicated, but the -ptr_ring structure used prior had a spinlock when dequeueing, so all and -all the difference appears to be a wash. - -Actually, from profiling, the biggest performance hit, by far, of this -commit winds up being atomic_add_unless(count, 1, max) and atomic_ -dec(count), which account for the majority of CPU time, according to -perf. In that sense, the previous ring buffer was superior in that it -could check if it was full by head==tail, which the list-based approach -cannot do. - -But all and all, this enables us to get massive memory savings, allowing -WireGuard to scale for real world deployments, without taking much of a -performance hit. - -[1] http://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue - -Reviewed-by: Dmitry Vyukov <dvyukov@google.com> -Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/device.c | 12 ++--- - drivers/net/wireguard/device.h | 15 +++--- - drivers/net/wireguard/peer.c | 28 ++++------- - drivers/net/wireguard/peer.h | 4 +- - drivers/net/wireguard/queueing.c | 86 +++++++++++++++++++++++++------- - drivers/net/wireguard/queueing.h | 45 ++++++++++++----- - drivers/net/wireguard/receive.c | 16 +++--- - drivers/net/wireguard/send.c | 31 ++++-------- - 8 files changed, 144 insertions(+), 93 deletions(-) - ---- a/drivers/net/wireguard/device.c -+++ b/drivers/net/wireguard/device.c -@@ -235,8 +235,8 @@ static void wg_destruct(struct net_devic - destroy_workqueue(wg->handshake_receive_wq); - destroy_workqueue(wg->handshake_send_wq); - destroy_workqueue(wg->packet_crypt_wq); -- wg_packet_queue_free(&wg->decrypt_queue, true); -- wg_packet_queue_free(&wg->encrypt_queue, true); -+ wg_packet_queue_free(&wg->decrypt_queue); -+ wg_packet_queue_free(&wg->encrypt_queue); - rcu_barrier(); /* Wait for all the peers to be actually freed. */ - wg_ratelimiter_uninit(); - memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); -@@ -338,12 +338,12 @@ static int wg_newlink(struct net *src_ne - goto err_destroy_handshake_send; - - ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, -- true, MAX_QUEUED_PACKETS); -+ MAX_QUEUED_PACKETS); - if (ret < 0) - goto err_destroy_packet_crypt; - - ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, -- true, MAX_QUEUED_PACKETS); -+ MAX_QUEUED_PACKETS); - if (ret < 0) - goto err_free_encrypt_queue; - -@@ -368,9 +368,9 @@ static int wg_newlink(struct net *src_ne - err_uninit_ratelimiter: - wg_ratelimiter_uninit(); - err_free_decrypt_queue: -- wg_packet_queue_free(&wg->decrypt_queue, true); -+ wg_packet_queue_free(&wg->decrypt_queue); - err_free_encrypt_queue: -- wg_packet_queue_free(&wg->encrypt_queue, true); -+ wg_packet_queue_free(&wg->encrypt_queue); - err_destroy_packet_crypt: - destroy_workqueue(wg->packet_crypt_wq); - err_destroy_handshake_send: ---- a/drivers/net/wireguard/device.h -+++ b/drivers/net/wireguard/device.h -@@ -27,13 +27,14 @@ struct multicore_worker { - - struct crypt_queue { - struct ptr_ring ring; -- union { -- struct { -- struct multicore_worker __percpu *worker; -- int last_cpu; -- }; -- struct work_struct work; -- }; -+ struct multicore_worker __percpu *worker; -+ int last_cpu; -+}; -+ -+struct prev_queue { -+ struct sk_buff *head, *tail, *peeked; -+ struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff. -+ atomic_t count; - }; - - struct wg_device { ---- a/drivers/net/wireguard/peer.c -+++ b/drivers/net/wireguard/peer.c -@@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg - peer = kzalloc(sizeof(*peer), GFP_KERNEL); - if (unlikely(!peer)) - return ERR_PTR(ret); -- peer->device = wg; -+ if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) -+ goto err; - -+ peer->device = wg; - wg_noise_handshake_init(&peer->handshake, &wg->static_identity, - public_key, preshared_key, peer); -- if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) -- goto err_1; -- if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, -- MAX_QUEUED_PACKETS)) -- goto err_2; -- if (wg_packet_queue_init(&peer->rx_queue, NULL, false, -- MAX_QUEUED_PACKETS)) -- goto err_3; -- - peer->internal_id = atomic64_inc_return(&peer_counter); - peer->serial_work_cpu = nr_cpumask_bits; - wg_cookie_init(&peer->latest_cookie); - wg_timers_init(peer); - wg_cookie_checker_precompute_peer_keys(peer); - spin_lock_init(&peer->keypairs.keypair_update_lock); -- INIT_WORK(&peer->transmit_handshake_work, -- wg_packet_handshake_send_worker); -+ INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker); -+ INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker); -+ wg_prev_queue_init(&peer->tx_queue); -+ wg_prev_queue_init(&peer->rx_queue); - rwlock_init(&peer->endpoint_lock); - kref_init(&peer->refcount); - skb_queue_head_init(&peer->staged_packet_queue); -@@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg - pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); - return peer; - --err_3: -- wg_packet_queue_free(&peer->tx_queue, false); --err_2: -- dst_cache_destroy(&peer->endpoint_cache); --err_1: -+err: - kfree(peer); - return ERR_PTR(ret); - } -@@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head - struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); - - dst_cache_destroy(&peer->endpoint_cache); -- wg_packet_queue_free(&peer->rx_queue, false); -- wg_packet_queue_free(&peer->tx_queue, false); -+ WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue)); - - /* The final zeroing takes care of clearing any remaining handshake key - * material and other potentially sensitive information. ---- a/drivers/net/wireguard/peer.h -+++ b/drivers/net/wireguard/peer.h -@@ -36,7 +36,7 @@ struct endpoint { - - struct wg_peer { - struct wg_device *device; -- struct crypt_queue tx_queue, rx_queue; -+ struct prev_queue tx_queue, rx_queue; - struct sk_buff_head staged_packet_queue; - int serial_work_cpu; - bool is_dead; -@@ -46,7 +46,7 @@ struct wg_peer { - rwlock_t endpoint_lock; - struct noise_handshake handshake; - atomic64_t last_sent_handshake; -- struct work_struct transmit_handshake_work, clear_peer_work; -+ struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work; - struct cookie latest_cookie; - struct hlist_node pubkey_hash; - u64 rx_bytes, tx_bytes; ---- a/drivers/net/wireguard/queueing.c -+++ b/drivers/net/wireguard/queueing.c -@@ -9,8 +9,7 @@ struct multicore_worker __percpu * - wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) - { - int cpu; -- struct multicore_worker __percpu *worker = -- alloc_percpu(struct multicore_worker); -+ struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker); - - if (!worker) - return NULL; -@@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc( - } - - int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, -- bool multicore, unsigned int len) -+ unsigned int len) - { - int ret; - -@@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_qu - ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); - if (ret) - return ret; -- if (function) { -- if (multicore) { -- queue->worker = wg_packet_percpu_multicore_worker_alloc( -- function, queue); -- if (!queue->worker) { -- ptr_ring_cleanup(&queue->ring, NULL); -- return -ENOMEM; -- } -- } else { -- INIT_WORK(&queue->work, function); -- } -+ queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue); -+ if (!queue->worker) { -+ ptr_ring_cleanup(&queue->ring, NULL); -+ return -ENOMEM; - } - return 0; - } - --void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) -+void wg_packet_queue_free(struct crypt_queue *queue) - { -- if (multicore) -- free_percpu(queue->worker); -+ free_percpu(queue->worker); - WARN_ON(!__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); - } -+ -+#define NEXT(skb) ((skb)->prev) -+#define STUB(queue) ((struct sk_buff *)&queue->empty) -+ -+void wg_prev_queue_init(struct prev_queue *queue) -+{ -+ NEXT(STUB(queue)) = NULL; -+ queue->head = queue->tail = STUB(queue); -+ queue->peeked = NULL; -+ atomic_set(&queue->count, 0); -+ BUILD_BUG_ON( -+ offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) - -+ offsetof(struct prev_queue, empty) || -+ offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) - -+ offsetof(struct prev_queue, empty)); -+} -+ -+static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) -+{ -+ WRITE_ONCE(NEXT(skb), NULL); -+ WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb); -+} -+ -+bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) -+{ -+ if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS)) -+ return false; -+ __wg_prev_queue_enqueue(queue, skb); -+ return true; -+} -+ -+struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue) -+{ -+ struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail)); -+ -+ if (tail == STUB(queue)) { -+ if (!next) -+ return NULL; -+ queue->tail = next; -+ tail = next; -+ next = smp_load_acquire(&NEXT(next)); -+ } -+ if (next) { -+ queue->tail = next; -+ atomic_dec(&queue->count); -+ return tail; -+ } -+ if (tail != READ_ONCE(queue->head)) -+ return NULL; -+ __wg_prev_queue_enqueue(queue, STUB(queue)); -+ next = smp_load_acquire(&NEXT(tail)); -+ if (next) { -+ queue->tail = next; -+ atomic_dec(&queue->count); -+ return tail; -+ } -+ return NULL; -+} -+ -+#undef NEXT -+#undef STUB ---- a/drivers/net/wireguard/queueing.h -+++ b/drivers/net/wireguard/queueing.h -@@ -17,12 +17,13 @@ struct wg_device; - struct wg_peer; - struct multicore_worker; - struct crypt_queue; -+struct prev_queue; - struct sk_buff; - - /* queueing.c APIs: */ - int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, -- bool multicore, unsigned int len); --void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); -+ unsigned int len); -+void wg_packet_queue_free(struct crypt_queue *queue); - struct multicore_worker __percpu * - wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); - -@@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online - return cpu; - } - -+void wg_prev_queue_init(struct prev_queue *queue); -+ -+/* Multi producer */ -+bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); -+ -+/* Single consumer */ -+struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); -+ -+/* Single consumer */ -+static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) -+{ -+ if (queue->peeked) -+ return queue->peeked; -+ queue->peeked = wg_prev_queue_dequeue(queue); -+ return queue->peeked; -+} -+ -+/* Single consumer */ -+static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) -+{ -+ queue->peeked = NULL; -+} -+ - static inline int wg_queue_enqueue_per_device_and_peer( -- struct crypt_queue *device_queue, struct crypt_queue *peer_queue, -+ struct crypt_queue *device_queue, struct prev_queue *peer_queue, - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) - { - int cpu; -@@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_d - /* We first queue this up for the peer ingestion, but the consumer - * will wait for the state to change to CRYPTED or DEAD before. - */ -- if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) -+ if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) - return -ENOSPC; -+ - /* Then we queue it up in the device queue, which consumes the - * packet as soon as it can. - */ -@@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_d - return 0; - } - --static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, -- struct sk_buff *skb, -- enum packet_state state) -+static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) - { - /* We take a reference, because as soon as we call atomic_set, the - * peer can be freed from below us. -@@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_ - struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); - - atomic_set_release(&PACKET_CB(skb)->state, state); -- queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, -- peer->internal_id), -- peer->device->packet_crypt_wq, &queue->work); -+ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), -+ peer->device->packet_crypt_wq, &peer->transmit_packet_work); - wg_peer_put(peer); - } - --static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, -- enum packet_state state) -+static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) - { - /* We take a reference, because as soon as we call atomic_set, the - * peer can be freed from below us. ---- a/drivers/net/wireguard/receive.c -+++ b/drivers/net/wireguard/receive.c -@@ -444,7 +444,6 @@ packet_processed: - int wg_packet_rx_poll(struct napi_struct *napi, int budget) - { - struct wg_peer *peer = container_of(napi, struct wg_peer, napi); -- struct crypt_queue *queue = &peer->rx_queue; - struct noise_keypair *keypair; - struct endpoint endpoint; - enum packet_state state; -@@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct - if (unlikely(budget <= 0)) - return 0; - -- while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && -+ while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL && - (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != - PACKET_STATE_UNCRYPTED) { -- __ptr_ring_discard_one(&queue->ring); -- peer = PACKET_PEER(skb); -+ wg_prev_queue_drop_peeked(&peer->rx_queue); - keypair = PACKET_CB(skb)->keypair; - free = true; - -@@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct wor - enum packet_state state = - likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? - PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; -- wg_queue_enqueue_per_peer_napi(skb, state); -+ wg_queue_enqueue_per_peer_rx(skb, state); - if (need_resched()) - cond_resched(); - } -@@ -531,12 +529,10 @@ static void wg_packet_consume_data(struc - if (unlikely(READ_ONCE(peer->is_dead))) - goto err; - -- ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, -- &peer->rx_queue, skb, -- wg->packet_crypt_wq, -- &wg->decrypt_queue.last_cpu); -+ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, -+ wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); - if (unlikely(ret == -EPIPE)) -- wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); -+ wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); - if (likely(!ret || ret == -EPIPE)) { - rcu_read_unlock_bh(); - return; ---- a/drivers/net/wireguard/send.c -+++ b/drivers/net/wireguard/send.c -@@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_ - wg_packet_send_staged_packets(peer); - } - --static void wg_packet_create_data_done(struct sk_buff *first, -- struct wg_peer *peer) -+static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) - { - struct sk_buff *skb, *next; - bool is_keepalive, data_sent = false; -@@ -262,22 +261,19 @@ static void wg_packet_create_data_done(s - - void wg_packet_tx_worker(struct work_struct *work) - { -- struct crypt_queue *queue = container_of(work, struct crypt_queue, -- work); -+ struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); - struct noise_keypair *keypair; - enum packet_state state; - struct sk_buff *first; -- struct wg_peer *peer; - -- while ((first = __ptr_ring_peek(&queue->ring)) != NULL && -+ while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL && - (state = atomic_read_acquire(&PACKET_CB(first)->state)) != - PACKET_STATE_UNCRYPTED) { -- __ptr_ring_discard_one(&queue->ring); -- peer = PACKET_PEER(first); -+ wg_prev_queue_drop_peeked(&peer->tx_queue); - keypair = PACKET_CB(first)->keypair; - - if (likely(state == PACKET_STATE_CRYPTED)) -- wg_packet_create_data_done(first, peer); -+ wg_packet_create_data_done(peer, first); - else - kfree_skb_list(first); - -@@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct wor - break; - } - } -- wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, -- state); -+ wg_queue_enqueue_per_peer_tx(first, state); - if (need_resched()) - cond_resched(); - } - } - --static void wg_packet_create_data(struct sk_buff *first) -+static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) - { -- struct wg_peer *peer = PACKET_PEER(first); - struct wg_device *wg = peer->device; - int ret = -EINVAL; - -@@ -323,13 +317,10 @@ static void wg_packet_create_data(struct - if (unlikely(READ_ONCE(peer->is_dead))) - goto err; - -- ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, -- &peer->tx_queue, first, -- wg->packet_crypt_wq, -- &wg->encrypt_queue.last_cpu); -+ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, -+ wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); - if (unlikely(ret == -EPIPE)) -- wg_queue_enqueue_per_peer(&peer->tx_queue, first, -- PACKET_STATE_DEAD); -+ wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); - err: - rcu_read_unlock_bh(); - if (likely(!ret || ret == -EPIPE)) -@@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struc - packets.prev->next = NULL; - wg_peer_get(keypair->entry.peer); - PACKET_CB(packets.next)->keypair = keypair; -- wg_packet_create_data(packets.next); -+ wg_packet_create_data(peer, packets.next); - return; - - out_invalid: diff --git a/target/linux/generic/backport-5.4/080-wireguard-0123-wireguard-kconfig-use-arm-chacha-even-with-no-neon.patch b/target/linux/generic/backport-5.4/080-wireguard-0123-wireguard-kconfig-use-arm-chacha-even-with-no-neon.patch deleted file mode 100644 index 9a251492c2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0123-wireguard-kconfig-use-arm-chacha-even-with-no-neon.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Mon, 22 Feb 2021 17:25:49 +0100 -Subject: [PATCH] wireguard: kconfig: use arm chacha even with no neon - -commit bce2473927af8de12ad131a743f55d69d358c0b9 upstream. - -The condition here was incorrect: a non-neon fallback implementation is -available on arm32 when NEON is not supported. - -Reported-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/Kconfig -+++ b/drivers/net/Kconfig -@@ -87,7 +87,7 @@ config WIREGUARD - select CRYPTO_CURVE25519_X86 if X86 && 64BIT - select ARM_CRYPTO if ARM - select ARM64_CRYPTO if ARM64 -- select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON -+ select CRYPTO_CHACHA20_NEON if ARM || (ARM64 && KERNEL_MODE_NEON) - select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON - select CRYPTO_POLY1305_ARM if ARM - select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON diff --git a/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch b/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch deleted file mode 100644 index c0ee841b02..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Maciej W. Rozycki" <macro@orcam.me.uk> -Date: Thu, 11 Mar 2021 21:50:47 -0700 -Subject: [PATCH] crypto: mips/poly1305 - enable for all MIPS processors - -commit 6c810cf20feef0d4338e9b424ab7f2644a8b353e upstream. - -The MIPS Poly1305 implementation is generic MIPS code written such as to -support down to the original MIPS I and MIPS III ISA for the 32-bit and -64-bit variant respectively. Lift the current limitation then to enable -code for MIPSr1 ISA or newer processors only and have it available for -all MIPS processors. - -Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk> -Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") -Cc: stable@vger.kernel.org # v5.5+ -Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/crypto/Makefile | 4 ++-- - crypto/Kconfig | 2 +- - drivers/net/Kconfig | 2 +- - 3 files changed, 4 insertions(+), 4 deletions(-) - ---- a/arch/mips/crypto/Makefile -+++ b/arch/mips/crypto/Makefile -@@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to - obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o - poly1305-mips-y := poly1305-core.o poly1305-glue.o - --perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 --perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 -+perlasm-flavour-$(CONFIG_32BIT) := o32 -+perlasm-flavour-$(CONFIG_64BIT) := 64 - - quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -740,7 +740,7 @@ config CRYPTO_POLY1305_X86_64 - - config CRYPTO_POLY1305_MIPS - tristate "Poly1305 authenticator algorithm (MIPS optimized)" -- depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) -+ depends on MIPS - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - - config CRYPTO_MD4 ---- a/drivers/net/Kconfig -+++ b/drivers/net/Kconfig -@@ -92,7 +92,7 @@ config WIREGUARD - select CRYPTO_POLY1305_ARM if ARM - select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON - select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 -- select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) -+ select CRYPTO_POLY1305_MIPS if MIPS - help - WireGuard is a secure, fast, and easy to use replacement for IPSec - that uses modern cryptography and clever networking tricks. It's diff --git a/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch b/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch deleted file mode 100644 index 856d67d5b8..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> -Date: Sat, 27 Mar 2021 19:39:43 -0700 -Subject: [PATCH] crypto: mips: add poly1305-core.S to .gitignore - -commit dc92d0df51dc61de88bf6f4884a17bf73d5c6326 upstream. - -poly1305-core.S is an auto-generated file, so it should be ignored. - -Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") -Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> -Cc: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/crypto/.gitignore | 2 ++ - 1 file changed, 2 insertions(+) - create mode 100644 arch/mips/crypto/.gitignore - ---- /dev/null -+++ b/arch/mips/crypto/.gitignore -@@ -0,0 +1,2 @@ -+# SPDX-License-Identifier: GPL-2.0-only -+poly1305-core.S diff --git a/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch b/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch deleted file mode 100644 index ded6625aeb..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann <arnd@arndb.de> -Date: Mon, 22 Mar 2021 18:05:15 +0100 -Subject: [PATCH] crypto: poly1305 - fix poly1305_core_setkey() declaration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 8d195e7a8ada68928f2aedb2c18302a4518fe68e upstream. - -gcc-11 points out a mismatch between the declaration and the definition -of poly1305_core_setkey(): - -lib/crypto/poly1305-donna32.c:13:67: error: argument 2 of type ‘const u8[16]’ {aka ‘const unsigned char[16]’} with mismatched bound [-Werror=array-parameter=] - 13 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) - | ~~~~~~~~~^~~~~~~~~~~ -In file included from lib/crypto/poly1305-donna32.c:11: -include/crypto/internal/poly1305.h:21:68: note: previously declared as ‘const u8 *’ {aka ‘const unsigned char *’} - 21 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); - -This is harmless in principle, as the calling conventions are the same, -but the more specific prototype allows better type checking in the -caller. - -Change the declaration to match the actual function definition. -The poly1305_simd_init() is a bit suspicious here, as it previously -had a 32-byte argument type, but looks like it needs to take the -16-byte POLY1305_BLOCK_SIZE array instead. - -Fixes: 1c08a104360f ("crypto: poly1305 - add new 32 and 64-bit generic versions") -Signed-off-by: Arnd Bergmann <arnd@arndb.de> -Reviewed-by: Ard Biesheuvel <ardb@kernel.org> -Reviewed-by: Eric Biggers <ebiggers@google.com> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/poly1305-glue.c | 2 +- - arch/arm64/crypto/poly1305-glue.c | 2 +- - arch/mips/crypto/poly1305-glue.c | 2 +- - arch/x86/crypto/poly1305_glue.c | 6 +++--- - include/crypto/internal/poly1305.h | 3 ++- - include/crypto/poly1305.h | 6 ++++-- - lib/crypto/poly1305-donna32.c | 3 ++- - lib/crypto/poly1305-donna64.c | 3 ++- - lib/crypto/poly1305.c | 3 ++- - 9 files changed, 18 insertions(+), 12 deletions(-) - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *s - - static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - --void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) - { - poly1305_init_arm(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(key + 16); ---- a/arch/arm64/crypto/poly1305-glue.c -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *stat - - static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - --void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) - { - poly1305_init_arm64(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(key + 16); ---- a/arch/mips/crypto/poly1305-glue.c -+++ b/arch/mips/crypto/poly1305-glue.c -@@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void - asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); - asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); - --void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) - { - poly1305_init_mips(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(key + 16); ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -15,7 +15,7 @@ - #include <asm/simd.h> - - asmlinkage void poly1305_init_x86_64(void *ctx, -- const u8 key[POLY1305_KEY_SIZE]); -+ const u8 key[POLY1305_BLOCK_SIZE]); - asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, - const size_t len, const u32 padbit); - asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], -@@ -80,7 +80,7 @@ static void convert_to_base2_64(void *ct - state->is_base2_26 = 0; - } - --static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) -+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) - { - poly1305_init_x86_64(ctx, key); - } -@@ -128,7 +128,7 @@ static void poly1305_simd_emit(void *ctx - poly1305_emit_avx(ctx, mac, nonce); - } - --void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) -+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) - { - poly1305_simd_init(&dctx->h, key); - dctx->s[0] = get_unaligned_le32(&key[16]); ---- a/include/crypto/internal/poly1305.h -+++ b/include/crypto/internal/poly1305.h -@@ -18,7 +18,8 @@ - * only the ε-almost-∆-universal hash function (not the full MAC) is computed. - */ - --void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); -+void poly1305_core_setkey(struct poly1305_core_key *key, -+ const u8 raw_key[POLY1305_BLOCK_SIZE]); - static inline void poly1305_core_init(struct poly1305_state *state) - { - *state = (struct poly1305_state){}; ---- a/include/crypto/poly1305.h -+++ b/include/crypto/poly1305.h -@@ -58,8 +58,10 @@ struct poly1305_desc_ctx { - }; - }; - --void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); --void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key); -+void poly1305_init_arch(struct poly1305_desc_ctx *desc, -+ const u8 key[POLY1305_KEY_SIZE]); -+void poly1305_init_generic(struct poly1305_desc_ctx *desc, -+ const u8 key[POLY1305_KEY_SIZE]); - - static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) - { ---- a/lib/crypto/poly1305-donna32.c -+++ b/lib/crypto/poly1305-donna32.c -@@ -10,7 +10,8 @@ - #include <asm/unaligned.h> - #include <crypto/internal/poly1305.h> - --void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) -+void poly1305_core_setkey(struct poly1305_core_key *key, -+ const u8 raw_key[POLY1305_BLOCK_SIZE]) - { - /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; ---- a/lib/crypto/poly1305-donna64.c -+++ b/lib/crypto/poly1305-donna64.c -@@ -12,7 +12,8 @@ - - typedef __uint128_t u128; - --void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) -+void poly1305_core_setkey(struct poly1305_core_key *key, -+ const u8 raw_key[POLY1305_BLOCK_SIZE]) - { - u64 t0, t1; - ---- a/lib/crypto/poly1305.c -+++ b/lib/crypto/poly1305.c -@@ -12,7 +12,8 @@ - #include <linux/module.h> - #include <asm/unaligned.h> - --void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) -+void poly1305_init_generic(struct poly1305_desc_ctx *desc, -+ const u8 key[POLY1305_KEY_SIZE]) - { - poly1305_core_setkey(&desc->core_r, key); - desc->s[0] = get_unaligned_le32(key + 16); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch b/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch deleted file mode 100644 index 3e7d1a8e02..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:30 +0200 -Subject: [PATCH] wireguard: selftests: remove old conntrack kconfig value - -commit acf2492b51c9a3c4dfb947f4d3477a86d315150f upstream. - -On recent kernels, this config symbol is no longer used. - -Reported-by: Rui Salvaterra <rsalvaterra@gmail.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/qemu/kernel.config | 1 - - 1 file changed, 1 deletion(-) - ---- a/tools/testing/selftests/wireguard/qemu/kernel.config -+++ b/tools/testing/selftests/wireguard/qemu/kernel.config -@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y - CONFIG_NETFILTER_XT_NAT=y - CONFIG_NETFILTER_XT_MATCH_LENGTH=y - CONFIG_NETFILTER_XT_MARK=y --CONFIG_NF_CONNTRACK_IPV4=y - CONFIG_NF_NAT_IPV4=y - CONFIG_IP_NF_IPTABLES=y - CONFIG_IP_NF_FILTER=y diff --git a/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch b/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch deleted file mode 100644 index 22d0f3e32e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:31 +0200 -Subject: [PATCH] wireguard: selftests: make sure rp_filter is disabled on - vethc - -commit f8873d11d4121aad35024f9379e431e0c83abead upstream. - -Some distros may enable strict rp_filter by default, which will prevent -vethc from receiving the packets with an unrouteable reverse path address. - -Reported-by: Hangbin Liu <liuhangbin@gmail.com> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - tools/testing/selftests/wireguard/netns.sh | 1 + - 1 file changed, 1 insertion(+) - ---- a/tools/testing/selftests/wireguard/netns.sh -+++ b/tools/testing/selftests/wireguard/netns.sh -@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_pref - ip1 -4 route add default dev wg0 table 51820 - ip1 -4 rule add not fwmark 51820 table 51820 - ip1 -4 rule add table main suppress_prefixlength 0 -+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' - # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. - n1 ping -W 1 -c 100 -f 192.168.99.7 - n1 ping -W 1 -c 100 -f abab::1111 diff --git a/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch b/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch deleted file mode 100644 index a7890a7384..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:32 +0200 -Subject: [PATCH] wireguard: do not use -O3 - -commit cc5060ca0285efe2728bced399a1955a7ce808b2 upstream. - -Apparently, various versions of gcc have O3-related miscompiles. Looking -at the difference between -O2 and -O3 for gcc 11 doesn't indicate -miscompiles, but the difference also doesn't seem so significant for -performance that it's worth risking. - -Link: https://lore.kernel.org/lkml/CAHk-=wjuoGyxDhAF8SsrTkN0-YfCx7E6jUN3ikC_tn2AKWTTsA@mail.gmail.com/ -Link: https://lore.kernel.org/lkml/CAHmME9otB5Wwxp7H8bR_i2uH2esEMvoBMC8uEXBMH9p0q1s6Bw@mail.gmail.com/ -Reported-by: Linus Torvalds <torvalds@linux-foundation.org> -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/Makefile | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/drivers/net/wireguard/Makefile -+++ b/drivers/net/wireguard/Makefile -@@ -1,5 +1,4 @@ --ccflags-y := -O3 --ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' -+ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' - ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG - wireguard-y := main.o - wireguard-y += noise.o diff --git a/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch b/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch deleted file mode 100644 index 309fe36198..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:33 +0200 -Subject: [PATCH] wireguard: use synchronize_net rather than synchronize_rcu - -commit 24b70eeeb4f46c09487f8155239ebfb1f875774a upstream. - -Many of the synchronization points are sometimes called under the rtnl -lock, which means we should use synchronize_net rather than -synchronize_rcu. Under the hood, this expands to using the expedited -flavor of function in the event that rtnl is held, in order to not stall -other concurrent changes. - -This fixes some very, very long delays when removing multiple peers at -once, which would cause some operations to take several minutes. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/peer.c | 6 +++--- - drivers/net/wireguard/socket.c | 2 +- - 2 files changed, 4 insertions(+), 4 deletions(-) - ---- a/drivers/net/wireguard/peer.c -+++ b/drivers/net/wireguard/peer.c -@@ -88,7 +88,7 @@ static void peer_make_dead(struct wg_pee - /* Mark as dead, so that we don't allow jumping contexts after. */ - WRITE_ONCE(peer->is_dead, true); - -- /* The caller must now synchronize_rcu() for this to take effect. */ -+ /* The caller must now synchronize_net() for this to take effect. */ - } - - static void peer_remove_after_dead(struct wg_peer *peer) -@@ -160,7 +160,7 @@ void wg_peer_remove(struct wg_peer *peer - lockdep_assert_held(&peer->device->device_update_lock); - - peer_make_dead(peer); -- synchronize_rcu(); -+ synchronize_net(); - peer_remove_after_dead(peer); - } - -@@ -178,7 +178,7 @@ void wg_peer_remove_all(struct wg_device - peer_make_dead(peer); - list_add_tail(&peer->peer_list, &dead_peers); - } -- synchronize_rcu(); -+ synchronize_net(); - list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) - peer_remove_after_dead(peer); - } ---- a/drivers/net/wireguard/socket.c -+++ b/drivers/net/wireguard/socket.c -@@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device * - if (new4) - wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); - mutex_unlock(&wg->socket_update_lock); -- synchronize_rcu(); -+ synchronize_net(); - sock_free(old4); - sock_free(old6); - } diff --git a/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch b/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch deleted file mode 100644 index 32ae327037..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:34 +0200 -Subject: [PATCH] wireguard: peer: allocate in kmem_cache - -commit a4e9f8e3287c9eb6bf70df982870980dd3341863 upstream. - -With deployments having upwards of 600k peers now, this somewhat heavy -structure could benefit from more fine-grained allocations. -Specifically, instead of using a 2048-byte slab for a 1544-byte object, -we can now use 1544-byte objects directly, thus saving almost 25% -per-peer, or with 600k peers, that's a savings of 303 MiB. This also -makes wireguard's memory usage more transparent in tools like slabtop -and /proc/slabinfo. - -Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring buffers") -Suggested-by: Arnd Bergmann <arnd@arndb.de> -Suggested-by: Matthew Wilcox <willy@infradead.org> -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/main.c | 7 +++++++ - drivers/net/wireguard/peer.c | 21 +++++++++++++++++---- - drivers/net/wireguard/peer.h | 3 +++ - 3 files changed, 27 insertions(+), 4 deletions(-) - ---- a/drivers/net/wireguard/main.c -+++ b/drivers/net/wireguard/main.c -@@ -28,6 +28,10 @@ static int __init mod_init(void) - #endif - wg_noise_init(); - -+ ret = wg_peer_init(); -+ if (ret < 0) -+ goto err_peer; -+ - ret = wg_device_init(); - if (ret < 0) - goto err_device; -@@ -44,6 +48,8 @@ static int __init mod_init(void) - err_netlink: - wg_device_uninit(); - err_device: -+ wg_peer_uninit(); -+err_peer: - return ret; - } - -@@ -51,6 +57,7 @@ static void __exit mod_exit(void) - { - wg_genetlink_uninit(); - wg_device_uninit(); -+ wg_peer_uninit(); - } - - module_init(mod_init); ---- a/drivers/net/wireguard/peer.c -+++ b/drivers/net/wireguard/peer.c -@@ -15,6 +15,7 @@ - #include <linux/rcupdate.h> - #include <linux/list.h> - -+static struct kmem_cache *peer_cache; - static atomic64_t peer_counter = ATOMIC64_INIT(0); - - struct wg_peer *wg_peer_create(struct wg_device *wg, -@@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg - if (wg->num_peers >= MAX_PEERS_PER_DEVICE) - return ERR_PTR(ret); - -- peer = kzalloc(sizeof(*peer), GFP_KERNEL); -+ peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL); - if (unlikely(!peer)) - return ERR_PTR(ret); -- if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) -+ if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))) - goto err; - - peer->device = wg; -@@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg - return peer; - - err: -- kfree(peer); -+ kmem_cache_free(peer_cache, peer); - return ERR_PTR(ret); - } - -@@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head - /* The final zeroing takes care of clearing any remaining handshake key - * material and other potentially sensitive information. - */ -- kzfree(peer); -+ memzero_explicit(peer, sizeof(*peer)); -+ kmem_cache_free(peer_cache, peer); - } - - static void kref_release(struct kref *refcount) -@@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer) - return; - kref_put(&peer->refcount, kref_release); - } -+ -+int __init wg_peer_init(void) -+{ -+ peer_cache = KMEM_CACHE(wg_peer, 0); -+ return peer_cache ? 0 : -ENOMEM; -+} -+ -+void wg_peer_uninit(void) -+{ -+ kmem_cache_destroy(peer_cache); -+} ---- a/drivers/net/wireguard/peer.h -+++ b/drivers/net/wireguard/peer.h -@@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer); - void wg_peer_remove(struct wg_peer *peer); - void wg_peer_remove_all(struct wg_device *wg); - -+int wg_peer_init(void); -+void wg_peer_uninit(void); -+ - #endif /* _WG_PEER_H */ diff --git a/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch b/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch deleted file mode 100644 index ce4e5dcf50..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:35 +0200 -Subject: [PATCH] wireguard: allowedips: initialize list head in selftest - -commit 46cfe8eee285cde465b420637507884551f5d7ca upstream. - -The randomized trie tests weren't initializing the dummy peer list head, -resulting in a NULL pointer dereference when used. Fix this by -initializing it in the randomized trie test, just like we do for the -static unit test. - -While we're at it, all of the other strings like this have the word -"self-test", so add it to the missing place here. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/selftest/allowedips.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/drivers/net/wireguard/selftest/allowedips.c -+++ b/drivers/net/wireguard/selftest/allowedips.c -@@ -296,6 +296,7 @@ static __init bool randomized_test(void) - goto free; - } - kref_init(&peers[i]->refcount); -+ INIT_LIST_HEAD(&peers[i]->allowedips_list); - } - - mutex_lock(&mutex); -@@ -333,7 +334,7 @@ static __init bool randomized_test(void) - if (wg_allowedips_insert_v4(&t, - (struct in_addr *)mutated, - cidr, peer, &mutex) < 0) { -- pr_err("allowedips random malloc: FAIL\n"); -+ pr_err("allowedips random self-test malloc: FAIL\n"); - goto free_locked; - } - if (horrible_allowedips_insert_v4(&h, diff --git a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch b/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch deleted file mode 100644 index 78da24ea46..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:36 +0200 -Subject: [PATCH] wireguard: allowedips: remove nodes in O(1) - -commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream. - -Previously, deleting peers would require traversing the entire trie in -order to rebalance nodes and safely free them. This meant that removing -1000 peers from a trie with a half million nodes would take an extremely -long time, during which we're holding the rtnl lock. Large-scale users -were reporting 200ms latencies added to the networking stack as a whole -every time their userspace software would queue up significant removals. -That's a serious situation. - -This commit fixes that by maintaining a double pointer to the parent's -bit pointer for each node, and then using the already existing node list -belonging to each peer to go directly to the node, fix up its pointers, -and free it with RCU. This means removal is O(1) instead of O(n), and we -don't use gobs of stack. - -The removal algorithm has the same downside as the code that it fixes: -it won't collapse needlessly long runs of fillers. We can enhance that -in the future if it ever becomes a problem. This commit documents that -limitation with a TODO comment in code, a small but meaningful -improvement over the prior situation. - -Currently the biggest flaw, which the next commit addresses, is that -because this increases the node size on 64-bit machines from 60 bytes to -68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up -using twice as much memory per node, because of power-of-two -allocations, which is a big bummer. We'll need to figure something out -there. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/allowedips.c | 132 ++++++++++++----------------- - drivers/net/wireguard/allowedips.h | 9 +- - 2 files changed, 57 insertions(+), 84 deletions(-) - ---- a/drivers/net/wireguard/allowedips.c -+++ b/drivers/net/wireguard/allowedips.c -@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc - } - } - --static void walk_remove_by_peer(struct allowedips_node __rcu **top, -- struct wg_peer *peer, struct mutex *lock) --{ --#define REF(p) rcu_access_pointer(p) --#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) --#define PUSH(p) ({ \ -- WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ -- stack[len++] = p; \ -- }) -- -- struct allowedips_node __rcu **stack[128], **nptr; -- struct allowedips_node *node, *prev; -- unsigned int len; -- -- if (unlikely(!peer || !REF(*top))) -- return; -- -- for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { -- nptr = stack[len - 1]; -- node = DEREF(nptr); -- if (!node) { -- --len; -- continue; -- } -- if (!prev || REF(prev->bit[0]) == node || -- REF(prev->bit[1]) == node) { -- if (REF(node->bit[0])) -- PUSH(&node->bit[0]); -- else if (REF(node->bit[1])) -- PUSH(&node->bit[1]); -- } else if (REF(node->bit[0]) == prev) { -- if (REF(node->bit[1])) -- PUSH(&node->bit[1]); -- } else { -- if (rcu_dereference_protected(node->peer, -- lockdep_is_held(lock)) == peer) { -- RCU_INIT_POINTER(node->peer, NULL); -- list_del_init(&node->peer_list); -- if (!node->bit[0] || !node->bit[1]) { -- rcu_assign_pointer(*nptr, DEREF( -- &node->bit[!REF(node->bit[0])])); -- kfree_rcu(node, rcu); -- node = DEREF(nptr); -- } -- } -- --len; -- } -- } -- --#undef REF --#undef DEREF --#undef PUSH --} -- - static unsigned int fls128(u64 a, u64 b) - { - return a ? fls64(a) + 64U : fls64(b); -@@ -224,6 +170,7 @@ static int add(struct allowedips_node __ - RCU_INIT_POINTER(node->peer, peer); - list_add_tail(&node->peer_list, &peer->allowedips_list); - copy_and_assign_cidr(node, key, cidr, bits); -+ rcu_assign_pointer(node->parent_bit, trie); - rcu_assign_pointer(*trie, node); - return 0; - } -@@ -243,9 +190,9 @@ static int add(struct allowedips_node __ - if (!node) { - down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); - } else { -- down = rcu_dereference_protected(CHOOSE_NODE(node, key), -- lockdep_is_held(lock)); -+ down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock)); - if (!down) { -+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key)); - rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); - return 0; - } -@@ -254,29 +201,37 @@ static int add(struct allowedips_node __ - parent = node; - - if (newnode->cidr == cidr) { -+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits)); - rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); -- if (!parent) -+ if (!parent) { -+ rcu_assign_pointer(newnode->parent_bit, trie); - rcu_assign_pointer(*trie, newnode); -- else -- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), -- newnode); -- } else { -- node = kzalloc(sizeof(*node), GFP_KERNEL); -- if (unlikely(!node)) { -- list_del(&newnode->peer_list); -- kfree(newnode); -- return -ENOMEM; -+ } else { -+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits)); -+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode); - } -- INIT_LIST_HEAD(&node->peer_list); -- copy_and_assign_cidr(node, newnode->bits, cidr, bits); -+ return 0; -+ } -+ -+ node = kzalloc(sizeof(*node), GFP_KERNEL); -+ if (unlikely(!node)) { -+ list_del(&newnode->peer_list); -+ kfree(newnode); -+ return -ENOMEM; -+ } -+ INIT_LIST_HEAD(&node->peer_list); -+ copy_and_assign_cidr(node, newnode->bits, cidr, bits); - -- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); -- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); -- if (!parent) -- rcu_assign_pointer(*trie, node); -- else -- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), -- node); -+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits)); -+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); -+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits)); -+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); -+ if (!parent) { -+ rcu_assign_pointer(node->parent_bit, trie); -+ rcu_assign_pointer(*trie, node); -+ } else { -+ rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits)); -+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node); - } - return 0; - } -@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow - void wg_allowedips_remove_by_peer(struct allowedips *table, - struct wg_peer *peer, struct mutex *lock) - { -+ struct allowedips_node *node, *child, *tmp; -+ -+ if (list_empty(&peer->allowedips_list)) -+ return; - ++table->seq; -- walk_remove_by_peer(&table->root4, peer, lock); -- walk_remove_by_peer(&table->root6, peer, lock); -+ list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) { -+ list_del_init(&node->peer_list); -+ RCU_INIT_POINTER(node->peer, NULL); -+ if (node->bit[0] && node->bit[1]) -+ continue; -+ child = rcu_dereference_protected( -+ node->bit[!rcu_access_pointer(node->bit[0])], -+ lockdep_is_held(lock)); -+ if (child) -+ child->parent_bit = node->parent_bit; -+ *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; -+ kfree_rcu(node, rcu); -+ -+ /* TODO: Note that we currently don't walk up and down in order to -+ * free any potential filler nodes. This means that this function -+ * doesn't free up as much as it could, which could be revisited -+ * at some point. -+ */ -+ } - } - - int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) ---- a/drivers/net/wireguard/allowedips.h -+++ b/drivers/net/wireguard/allowedips.h -@@ -15,14 +15,11 @@ struct wg_peer; - struct allowedips_node { - struct wg_peer __rcu *peer; - struct allowedips_node __rcu *bit[2]; -- /* While it may seem scandalous that we waste space for v4, -- * we're alloc'ing to the nearest power of 2 anyway, so this -- * doesn't actually make a difference. -- */ -- u8 bits[16] __aligned(__alignof(u64)); - u8 cidr, bit_at_a, bit_at_b, bitlen; -+ u8 bits[16] __aligned(__alignof(u64)); - -- /* Keep rarely used list at bottom to be beyond cache line. */ -+ /* Keep rarely used members at bottom to be beyond cache line. */ -+ struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */ - union { - struct list_head peer_list; - struct rcu_head rcu; diff --git a/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch b/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch deleted file mode 100644 index 65b31b05f5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch +++ /dev/null @@ -1,173 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:37 +0200 -Subject: [PATCH] wireguard: allowedips: allocate nodes in kmem_cache - -commit dc680de28ca849dfe589dc15ac56d22505f0ef11 upstream. - -The previous commit moved from O(n) to O(1) for removal, but in the -process introduced an additional pointer member to a struct that -increased the size from 60 to 68 bytes, putting nodes in the 128-byte -slab. With deployed systems having as many as 2 million nodes, this -represents a significant doubling in memory usage (128 MiB -> 256 MiB). -Fix this by using our own kmem_cache, that's sized exactly right. This -also makes wireguard's memory usage more transparent in tools like -slabtop and /proc/slabinfo. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Suggested-by: Arnd Bergmann <arnd@arndb.de> -Suggested-by: Matthew Wilcox <willy@infradead.org> -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/allowedips.c | 31 ++++++++++++++++++++++++------ - drivers/net/wireguard/allowedips.h | 5 ++++- - drivers/net/wireguard/main.c | 10 +++++++++- - 3 files changed, 38 insertions(+), 8 deletions(-) - ---- a/drivers/net/wireguard/allowedips.c -+++ b/drivers/net/wireguard/allowedips.c -@@ -6,6 +6,8 @@ - #include "allowedips.h" - #include "peer.h" - -+static struct kmem_cache *node_cache; -+ - static void swap_endian(u8 *dst, const u8 *src, u8 bits) - { - if (bits == 32) { -@@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_n - } - } - -+static void node_free_rcu(struct rcu_head *rcu) -+{ -+ kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); -+} -+ - static void root_free_rcu(struct rcu_head *rcu) - { - struct allowedips_node *node, *stack[128] = { -@@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_hea - while (len > 0 && (node = stack[--len])) { - push_rcu(stack, node->bit[0], &len); - push_rcu(stack, node->bit[1], &len); -- kfree(node); -+ kmem_cache_free(node_cache, node); - } - } - -@@ -164,7 +171,7 @@ static int add(struct allowedips_node __ - return -EINVAL; - - if (!rcu_access_pointer(*trie)) { -- node = kzalloc(sizeof(*node), GFP_KERNEL); -+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL); - if (unlikely(!node)) - return -ENOMEM; - RCU_INIT_POINTER(node->peer, peer); -@@ -180,7 +187,7 @@ static int add(struct allowedips_node __ - return 0; - } - -- newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); -+ newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL); - if (unlikely(!newnode)) - return -ENOMEM; - RCU_INIT_POINTER(newnode->peer, peer); -@@ -213,10 +220,10 @@ static int add(struct allowedips_node __ - return 0; - } - -- node = kzalloc(sizeof(*node), GFP_KERNEL); -+ node = kmem_cache_zalloc(node_cache, GFP_KERNEL); - if (unlikely(!node)) { - list_del(&newnode->peer_list); -- kfree(newnode); -+ kmem_cache_free(node_cache, newnode); - return -ENOMEM; - } - INIT_LIST_HEAD(&node->peer_list); -@@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct - if (child) - child->parent_bit = node->parent_bit; - *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; -- kfree_rcu(node, rcu); -+ call_rcu(&node->rcu, node_free_rcu); - - /* TODO: Note that we currently don't walk up and down in order to - * free any potential filler nodes. This means that this function -@@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src - return NULL; - } - -+int __init wg_allowedips_slab_init(void) -+{ -+ node_cache = KMEM_CACHE(allowedips_node, 0); -+ return node_cache ? 0 : -ENOMEM; -+} -+ -+void wg_allowedips_slab_uninit(void) -+{ -+ rcu_barrier(); -+ kmem_cache_destroy(node_cache); -+} -+ - #include "selftest/allowedips.c" ---- a/drivers/net/wireguard/allowedips.h -+++ b/drivers/net/wireguard/allowedips.h -@@ -19,7 +19,7 @@ struct allowedips_node { - u8 bits[16] __aligned(__alignof(u64)); - - /* Keep rarely used members at bottom to be beyond cache line. */ -- struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */ -+ struct allowedips_node *__rcu *parent_bit; - union { - struct list_head peer_list; - struct rcu_head rcu; -@@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src - bool wg_allowedips_selftest(void); - #endif - -+int wg_allowedips_slab_init(void); -+void wg_allowedips_slab_uninit(void); -+ - #endif /* _WG_ALLOWEDIPS_H */ ---- a/drivers/net/wireguard/main.c -+++ b/drivers/net/wireguard/main.c -@@ -21,10 +21,15 @@ static int __init mod_init(void) - { - int ret; - -+ ret = wg_allowedips_slab_init(); -+ if (ret < 0) -+ goto err_allowedips; -+ - #ifdef DEBUG -+ ret = -ENOTRECOVERABLE; - if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || - !wg_ratelimiter_selftest()) -- return -ENOTRECOVERABLE; -+ goto err_peer; - #endif - wg_noise_init(); - -@@ -50,6 +55,8 @@ err_netlink: - err_device: - wg_peer_uninit(); - err_peer: -+ wg_allowedips_slab_uninit(); -+err_allowedips: - return ret; - } - -@@ -58,6 +65,7 @@ static void __exit mod_exit(void) - wg_genetlink_uninit(); - wg_device_uninit(); - wg_peer_uninit(); -+ wg_allowedips_slab_uninit(); - } - - module_init(mod_init); diff --git a/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch b/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch deleted file mode 100644 index c044ad25af..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch +++ /dev/null @@ -1,521 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 4 Jun 2021 17:17:38 +0200 -Subject: [PATCH] wireguard: allowedips: free empty intermediate nodes when - removing single node - -commit bf7b042dc62a31f66d3a41dd4dfc7806f267b307 upstream. - -When removing single nodes, it's possible that that node's parent is an -empty intermediate node, in which case, it too should be removed. -Otherwise the trie fills up and never is fully emptied, leading to -gradual memory leaks over time for tries that are modified often. There -was originally code to do this, but was removed during refactoring in -2016 and never reworked. Now that we have proper parent pointers from -the previous commits, we can implement this properly. - -In order to reduce branching and expensive comparisons, we want to keep -the double pointer for parent assignment (which lets us easily chain up -to the root), but we still need to actually get the parent's base -address. So encode the bit number into the last two bits of the pointer, -and pack and unpack it as needed. This is a little bit clumsy but is the -fastest and less memory wasteful of the compromises. Note that we align -the root struct here to a minimum of 4, because it's embedded into a -larger struct, and we're relying on having the bottom two bits for our -flag, which would only be 16-bit aligned on m68k. - -The existing macro-based helpers were a bit unwieldy for adding the bit -packing to, so this commit replaces them with safer and clearer ordinary -functions. - -We add a test to the randomized/fuzzer part of the selftests, to free -the randomized tries by-peer, refuzz it, and repeat, until it's supposed -to be empty, and then then see if that actually resulted in the whole -thing being emptied. That combined with kmemcheck should hopefully make -sure this commit is doing what it should. Along the way this resulted in -various other cleanups of the tests and fixes for recent graphviz. - -Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - drivers/net/wireguard/allowedips.c | 102 ++++++------ - drivers/net/wireguard/allowedips.h | 4 +- - drivers/net/wireguard/selftest/allowedips.c | 162 ++++++++++---------- - 3 files changed, 137 insertions(+), 131 deletions(-) - ---- a/drivers/net/wireguard/allowedips.c -+++ b/drivers/net/wireguard/allowedips.c -@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct - node->bitlen = bits; - memcpy(node->bits, src, bits / 8U); - } --#define CHOOSE_NODE(parent, key) \ -- parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] -+ -+static inline u8 choose(struct allowedips_node *node, const u8 *key) -+{ -+ return (key[node->bit_at_a] >> node->bit_at_b) & 1; -+} - - static void push_rcu(struct allowedips_node **stack, - struct allowedips_node __rcu *p, unsigned int *len) -@@ -112,7 +115,7 @@ static struct allowedips_node *find_node - found = node; - if (node->cidr == bits) - break; -- node = rcu_dereference_bh(CHOOSE_NODE(node, key)); -+ node = rcu_dereference_bh(node->bit[choose(node, key)]); - } - return found; - } -@@ -144,8 +147,7 @@ static bool node_placement(struct allowe - u8 cidr, u8 bits, struct allowedips_node **rnode, - struct mutex *lock) - { -- struct allowedips_node *node = rcu_dereference_protected(trie, -- lockdep_is_held(lock)); -+ struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock)); - struct allowedips_node *parent = NULL; - bool exact = false; - -@@ -155,13 +157,24 @@ static bool node_placement(struct allowe - exact = true; - break; - } -- node = rcu_dereference_protected(CHOOSE_NODE(parent, key), -- lockdep_is_held(lock)); -+ node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock)); - } - *rnode = parent; - return exact; - } - -+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) -+{ -+ node->parent_bit_packed = (unsigned long)parent | bit; -+ rcu_assign_pointer(*parent, node); -+} -+ -+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node) -+{ -+ u8 bit = choose(parent, node->bits); -+ connect_node(&parent->bit[bit], bit, node); -+} -+ - static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, - u8 cidr, struct wg_peer *peer, struct mutex *lock) - { -@@ -177,8 +190,7 @@ static int add(struct allowedips_node __ - RCU_INIT_POINTER(node->peer, peer); - list_add_tail(&node->peer_list, &peer->allowedips_list); - copy_and_assign_cidr(node, key, cidr, bits); -- rcu_assign_pointer(node->parent_bit, trie); -- rcu_assign_pointer(*trie, node); -+ connect_node(trie, 2, node); - return 0; - } - if (node_placement(*trie, key, cidr, bits, &node, lock)) { -@@ -197,10 +209,10 @@ static int add(struct allowedips_node __ - if (!node) { - down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); - } else { -- down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock)); -+ const u8 bit = choose(node, key); -+ down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock)); - if (!down) { -- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key)); -- rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); -+ connect_node(&node->bit[bit], bit, newnode); - return 0; - } - } -@@ -208,15 +220,11 @@ static int add(struct allowedips_node __ - parent = node; - - if (newnode->cidr == cidr) { -- rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits)); -- rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); -- if (!parent) { -- rcu_assign_pointer(newnode->parent_bit, trie); -- rcu_assign_pointer(*trie, newnode); -- } else { -- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits)); -- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode); -- } -+ choose_and_connect_node(newnode, down); -+ if (!parent) -+ connect_node(trie, 2, newnode); -+ else -+ choose_and_connect_node(parent, newnode); - return 0; - } - -@@ -229,17 +237,12 @@ static int add(struct allowedips_node __ - INIT_LIST_HEAD(&node->peer_list); - copy_and_assign_cidr(node, newnode->bits, cidr, bits); - -- rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits)); -- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); -- rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits)); -- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); -- if (!parent) { -- rcu_assign_pointer(node->parent_bit, trie); -- rcu_assign_pointer(*trie, node); -- } else { -- rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits)); -- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node); -- } -+ choose_and_connect_node(node, down); -+ choose_and_connect_node(node, newnode); -+ if (!parent) -+ connect_node(trie, 2, node); -+ else -+ choose_and_connect_node(parent, node); - return 0; - } - -@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allow - void wg_allowedips_remove_by_peer(struct allowedips *table, - struct wg_peer *peer, struct mutex *lock) - { -- struct allowedips_node *node, *child, *tmp; -+ struct allowedips_node *node, *child, **parent_bit, *parent, *tmp; -+ bool free_parent; - - if (list_empty(&peer->allowedips_list)) - return; -@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct - RCU_INIT_POINTER(node->peer, NULL); - if (node->bit[0] && node->bit[1]) - continue; -- child = rcu_dereference_protected( -- node->bit[!rcu_access_pointer(node->bit[0])], -- lockdep_is_held(lock)); -+ child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], -+ lockdep_is_held(lock)); - if (child) -- child->parent_bit = node->parent_bit; -- *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; -+ child->parent_bit_packed = node->parent_bit_packed; -+ parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); -+ *parent_bit = child; -+ parent = (void *)parent_bit - -+ offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); -+ free_parent = !rcu_access_pointer(node->bit[0]) && -+ !rcu_access_pointer(node->bit[1]) && -+ (node->parent_bit_packed & 3) <= 1 && -+ !rcu_access_pointer(parent->peer); -+ if (free_parent) -+ child = rcu_dereference_protected( -+ parent->bit[!(node->parent_bit_packed & 1)], -+ lockdep_is_held(lock)); - call_rcu(&node->rcu, node_free_rcu); -- -- /* TODO: Note that we currently don't walk up and down in order to -- * free any potential filler nodes. This means that this function -- * doesn't free up as much as it could, which could be revisited -- * at some point. -- */ -+ if (!free_parent) -+ continue; -+ if (child) -+ child->parent_bit_packed = parent->parent_bit_packed; -+ *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; -+ call_rcu(&parent->rcu, node_free_rcu); - } - } - ---- a/drivers/net/wireguard/allowedips.h -+++ b/drivers/net/wireguard/allowedips.h -@@ -19,7 +19,7 @@ struct allowedips_node { - u8 bits[16] __aligned(__alignof(u64)); - - /* Keep rarely used members at bottom to be beyond cache line. */ -- struct allowedips_node *__rcu *parent_bit; -+ unsigned long parent_bit_packed; - union { - struct list_head peer_list; - struct rcu_head rcu; -@@ -30,7 +30,7 @@ struct allowedips { - struct allowedips_node __rcu *root4; - struct allowedips_node __rcu *root6; - u64 seq; --}; -+} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */ - - void wg_allowedips_init(struct allowedips *table); - void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); ---- a/drivers/net/wireguard/selftest/allowedips.c -+++ b/drivers/net/wireguard/selftest/allowedips.c -@@ -19,32 +19,22 @@ - - #include <linux/siphash.h> - --static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, -- u8 cidr) --{ -- swap_endian(dst, src, bits); -- memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); -- if (cidr) -- dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); --} -- - static __init void print_node(struct allowedips_node *node, u8 bits) - { - char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; -- char *fmt_declaration = KERN_DEBUG -- "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; -+ char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; -+ u8 ip1[16], ip2[16], cidr1, cidr2; - char *style = "dotted"; -- u8 ip1[16], ip2[16]; - u32 color = 0; - -+ if (node == NULL) -+ return; - if (bits == 32) { - fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; -- fmt_declaration = KERN_DEBUG -- "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; -+ fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; - } else if (bits == 128) { - fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; -- fmt_declaration = KERN_DEBUG -- "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; -+ fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; - } - if (node->peer) { - hsiphash_key_t key = { { 0 } }; -@@ -55,24 +45,20 @@ static __init void print_node(struct all - hsiphash_1u32(0xabad1dea, &key) % 200; - style = "bold"; - } -- swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); -- printk(fmt_declaration, ip1, node->cidr, style, color); -+ wg_allowedips_read_node(node, ip1, &cidr1); -+ printk(fmt_declaration, ip1, cidr1, style, color); - if (node->bit[0]) { -- swap_endian_and_apply_cidr(ip2, -- rcu_dereference_raw(node->bit[0])->bits, bits, -- node->cidr); -- printk(fmt_connection, ip1, node->cidr, ip2, -- rcu_dereference_raw(node->bit[0])->cidr); -- print_node(rcu_dereference_raw(node->bit[0]), bits); -+ wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2); -+ printk(fmt_connection, ip1, cidr1, ip2, cidr2); - } - if (node->bit[1]) { -- swap_endian_and_apply_cidr(ip2, -- rcu_dereference_raw(node->bit[1])->bits, -- bits, node->cidr); -- printk(fmt_connection, ip1, node->cidr, ip2, -- rcu_dereference_raw(node->bit[1])->cidr); -- print_node(rcu_dereference_raw(node->bit[1]), bits); -+ wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2); -+ printk(fmt_connection, ip1, cidr1, ip2, cidr2); - } -+ if (node->bit[0]) -+ print_node(rcu_dereference_raw(node->bit[0]), bits); -+ if (node->bit[1]) -+ print_node(rcu_dereference_raw(node->bit[1]), bits); - } - - static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) -@@ -121,8 +107,8 @@ static __init inline union nf_inet_addr - { - union nf_inet_addr mask; - -- memset(&mask, 0x00, 128 / 8); -- memset(&mask, 0xff, cidr / 8); -+ memset(&mask, 0, sizeof(mask)); -+ memset(&mask.all, 0xff, cidr / 8); - if (cidr % 32) - mask.all[cidr / 32] = (__force u32)htonl( - (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); -@@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allow - } - - static __init inline bool --horrible_match_v4(const struct horrible_allowedips_node *node, -- struct in_addr *ip) -+horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip) - { - return (ip->s_addr & node->mask.ip) == node->ip.ip; - } - - static __init inline bool --horrible_match_v6(const struct horrible_allowedips_node *node, -- struct in6_addr *ip) -+horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip) - { -- return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == -- node->ip.ip6[0] && -- (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == -- node->ip.ip6[1] && -- (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == -- node->ip.ip6[2] && -+ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] && -+ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] && -+ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] && - (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; - } - - static __init void --horrible_insert_ordered(struct horrible_allowedips *table, -- struct horrible_allowedips_node *node) -+horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node) - { - struct horrible_allowedips_node *other = NULL, *where = NULL; - u8 my_cidr = horrible_mask_to_cidr(node->mask); - - hlist_for_each_entry(other, &table->head, table) { -- if (!memcmp(&other->mask, &node->mask, -- sizeof(union nf_inet_addr)) && -- !memcmp(&other->ip, &node->ip, -- sizeof(union nf_inet_addr)) && -- other->ip_version == node->ip_version) { -+ if (other->ip_version == node->ip_version && -+ !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) && -+ !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) { - other->value = node->value; - kfree(node); - return; - } -+ } -+ hlist_for_each_entry(other, &table->head, table) { - where = other; - if (horrible_mask_to_cidr(other->mask) <= my_cidr) - break; -@@ -201,8 +181,7 @@ static __init int - horrible_allowedips_insert_v4(struct horrible_allowedips *table, - struct in_addr *ip, u8 cidr, void *value) - { -- struct horrible_allowedips_node *node = kzalloc(sizeof(*node), -- GFP_KERNEL); -+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); - - if (unlikely(!node)) - return -ENOMEM; -@@ -219,8 +198,7 @@ static __init int - horrible_allowedips_insert_v6(struct horrible_allowedips *table, - struct in6_addr *ip, u8 cidr, void *value) - { -- struct horrible_allowedips_node *node = kzalloc(sizeof(*node), -- GFP_KERNEL); -+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); - - if (unlikely(!node)) - return -ENOMEM; -@@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct hor - } - - static __init void * --horrible_allowedips_lookup_v4(struct horrible_allowedips *table, -- struct in_addr *ip) -+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip) - { - struct horrible_allowedips_node *node; -- void *ret = NULL; - - hlist_for_each_entry(node, &table->head, table) { -- if (node->ip_version != 4) -- continue; -- if (horrible_match_v4(node, ip)) { -- ret = node->value; -- break; -- } -+ if (node->ip_version == 4 && horrible_match_v4(node, ip)) -+ return node->value; - } -- return ret; -+ return NULL; - } - - static __init void * --horrible_allowedips_lookup_v6(struct horrible_allowedips *table, -- struct in6_addr *ip) -+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip) - { - struct horrible_allowedips_node *node; -- void *ret = NULL; - - hlist_for_each_entry(node, &table->head, table) { -- if (node->ip_version != 6) -+ if (node->ip_version == 6 && horrible_match_v6(node, ip)) -+ return node->value; -+ } -+ return NULL; -+} -+ -+ -+static __init void -+horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value) -+{ -+ struct horrible_allowedips_node *node; -+ struct hlist_node *h; -+ -+ hlist_for_each_entry_safe(node, h, &table->head, table) { -+ if (node->value != value) - continue; -- if (horrible_match_v6(node, ip)) { -- ret = node->value; -- break; -- } -+ hlist_del(&node->table); -+ kfree(node); - } -- return ret; -+ - } - - static __init bool randomized_test(void) -@@ -397,23 +379,33 @@ static __init bool randomized_test(void) - print_tree(t.root6, 128); - } - -- for (i = 0; i < NUM_QUERIES; ++i) { -- prandom_bytes(ip, 4); -- if (lookup(t.root4, 32, ip) != -- horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { -- pr_err("allowedips random self-test: FAIL\n"); -- goto free; -+ for (j = 0;; ++j) { -+ for (i = 0; i < NUM_QUERIES; ++i) { -+ prandom_bytes(ip, 4); -+ if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { -+ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip); -+ pr_err("allowedips random v4 self-test: FAIL\n"); -+ goto free; -+ } -+ prandom_bytes(ip, 16); -+ if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { -+ pr_err("allowedips random v6 self-test: FAIL\n"); -+ goto free; -+ } - } -+ if (j >= NUM_PEERS) -+ break; -+ mutex_lock(&mutex); -+ wg_allowedips_remove_by_peer(&t, peers[j], &mutex); -+ mutex_unlock(&mutex); -+ horrible_allowedips_remove_by_value(&h, peers[j]); - } - -- for (i = 0; i < NUM_QUERIES; ++i) { -- prandom_bytes(ip, 16); -- if (lookup(t.root6, 128, ip) != -- horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { -- pr_err("allowedips random self-test: FAIL\n"); -- goto free; -- } -+ if (t.root4 || t.root6) { -+ pr_err("allowedips random self-test removal: FAIL\n"); -+ goto free; - } -+ - ret = true; - - free: diff --git a/target/linux/generic/backport-5.4/300-MIPS-Exclude-more-dsemul-code-when-CONFIG_MIPS_FP_SU.patch b/target/linux/generic/backport-5.4/300-MIPS-Exclude-more-dsemul-code-when-CONFIG_MIPS_FP_SU.patch deleted file mode 100644 index 0bc58e756b..0000000000 --- a/target/linux/generic/backport-5.4/300-MIPS-Exclude-more-dsemul-code-when-CONFIG_MIPS_FP_SU.patch +++ /dev/null @@ -1,134 +0,0 @@ -From d96c3157f9ca177727fbad960fcf6f52f145f471 Mon Sep 17 00:00:00 2001 -From: Yousong Zhou <yszhou4tech@gmail.com> -Date: Thu, 9 Jan 2020 11:33:19 +0800 -Subject: [PATCH] MIPS: Exclude more dsemul code when CONFIG_MIPS_FP_SUPPORT=n - -This furthers what commit 42b10815d559 ("MIPS: Don't compile math-emu -when CONFIG_MIPS_FP_SUPPORT=n") has done - -Signed-off-by: Yousong Zhou <yszhou4tech@gmail.com> ---- - arch/mips/include/asm/processor.h | 12 ++++++------ - arch/mips/kernel/process.c | 10 ++++++++-- - arch/mips/kernel/vdso.c | 26 +++++++++++++++----------- - 3 files changed, 29 insertions(+), 19 deletions(-) - ---- a/arch/mips/include/asm/processor.h -+++ b/arch/mips/include/asm/processor.h -@@ -253,13 +253,13 @@ struct thread_struct { - #ifdef CONFIG_MIPS_FP_SUPPORT - /* Saved fpu/fpu emulator stuff. */ - struct mips_fpu_struct fpu FPU_ALIGN; --#endif - /* Assigned branch delay slot 'emulation' frame */ - atomic_t bd_emu_frame; - /* PC of the branch from a branch delay slot 'emulation' */ - unsigned long bd_emu_branch_pc; - /* PC to continue from following a branch delay slot 'emulation' */ - unsigned long bd_emu_cont_pc; -+#endif - #ifdef CONFIG_MIPS_MT_FPAFF - /* Emulated instruction count */ - unsigned long emulated_fp; -@@ -302,7 +302,11 @@ struct thread_struct { - .fpr = {{{0,},},}, \ - .fcr31 = 0, \ - .msacsr = 0, \ -- }, -+ }, \ -+ /* Delay slot emulation */ \ -+ .bd_emu_frame = ATOMIC_INIT(BD_EMUFRAME_NONE), \ -+ .bd_emu_branch_pc = 0, \ -+ .bd_emu_cont_pc = 0, - #else - # define FPU_INIT - #endif -@@ -334,10 +338,6 @@ struct thread_struct { - * FPU affinity state (null if not FPAFF) \ - */ \ - FPAFF_INIT \ -- /* Delay slot emulation */ \ -- .bd_emu_frame = ATOMIC_INIT(BD_EMUFRAME_NONE), \ -- .bd_emu_branch_pc = 0, \ -- .bd_emu_cont_pc = 0, \ - /* \ - * Saved DSP stuff \ - */ \ ---- a/arch/mips/kernel/process.c -+++ b/arch/mips/kernel/process.c -@@ -75,7 +75,9 @@ void start_thread(struct pt_regs * regs, - lose_fpu(0); - clear_thread_flag(TIF_MSA_CTX_LIVE); - clear_used_math(); -+#ifdef CONFIG_MIPS_FP_SUPPORT - atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); -+#endif - init_dsp(); - regs->cp0_epc = pc; - regs->regs[29] = sp; -@@ -176,7 +178,9 @@ int copy_thread_tls(unsigned long clone_ - clear_tsk_thread_flag(p, TIF_FPUBOUND); - #endif /* CONFIG_MIPS_MT_FPAFF */ - -+#ifdef CONFIG_MIPS_FP_SUPPORT - atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); -+#endif - - if (clone_flags & CLONE_SETTLS) - ti->tp_value = tls; -@@ -650,8 +654,10 @@ unsigned long mips_stack_top(void) - { - unsigned long top = TASK_SIZE & PAGE_MASK; - -- /* One page for branch delay slot "emulation" */ -- top -= PAGE_SIZE; -+ if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT)) { -+ /* One page for branch delay slot "emulation" */ -+ top -= PAGE_SIZE; -+ } - - /* Space for the VDSO, data page & GIC user page */ - top -= PAGE_ALIGN(current->thread.abi->vdso->size); ---- a/arch/mips/kernel/vdso.c -+++ b/arch/mips/kernel/vdso.c -@@ -71,10 +71,12 @@ subsys_initcall(init_vdso); - - static unsigned long vdso_base(void) - { -- unsigned long base; -+ unsigned long base = STACK_TOP; - -- /* Skip the delay slot emulation page */ -- base = STACK_TOP + PAGE_SIZE; -+ if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT)) { -+ /* Skip the delay slot emulation page */ -+ base += PAGE_SIZE; -+ } - - if (current->flags & PF_RANDOMIZE) { - base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); -@@ -95,14 +97,16 @@ int arch_setup_additional_pages(struct l - if (down_write_killable(&mm->mmap_sem)) - return -EINTR; - -- /* Map delay slot emulation page */ -- base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, -- VM_READ | VM_EXEC | -- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, -- 0, NULL); -- if (IS_ERR_VALUE(base)) { -- ret = base; -- goto out; -+ if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT)) { -+ /* Map delay slot emulation page */ -+ base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, -+ VM_READ | VM_EXEC | -+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, -+ 0, NULL); -+ if (IS_ERR_VALUE(base)) { -+ ret = base; -+ goto out; -+ } - } - - /* diff --git a/target/linux/generic/backport-5.4/310-mips-Kconfig-Add-ARCH_HAS_FORTIFY_SOURCE.patch b/target/linux/generic/backport-5.4/310-mips-Kconfig-Add-ARCH_HAS_FORTIFY_SOURCE.patch deleted file mode 100644 index e02f103543..0000000000 --- a/target/linux/generic/backport-5.4/310-mips-Kconfig-Add-ARCH_HAS_FORTIFY_SOURCE.patch +++ /dev/null @@ -1,32 +0,0 @@ -From a8d2bb0559b5fefa5173ff4e7496cc6250db2c8a Mon Sep 17 00:00:00 2001 -From: Dmitry Korotin <dkorotin@wavecomp.com> -Date: Thu, 12 Sep 2019 22:53:45 +0000 -Subject: [PATCH] mips: Kconfig: Add ARCH_HAS_FORTIFY_SOURCE - -FORTIFY_SOURCE detects various overflows at compile and run time. -(6974f0c4555e ("include/linux/string.h: -add the option of fortified string.h functions) - -ARCH_HAS_FORTIFY_SOURCE means that the architecture can be built and -run with CONFIG_FORTIFY_SOURCE. - -Since mips can be built and run with that flag, -select ARCH_HAS_FORTIFY_SOURCE as default. - -Signed-off-by: Dmitry Korotin <dkorotin@wavecomp.com> -Signed-off-by: Paul Burton <paul.burton@mips.com> -Cc: linux-mips@vger.kernel.org ---- - arch/mips/Kconfig | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/mips/Kconfig -+++ b/arch/mips/Kconfig -@@ -7,6 +7,7 @@ config MIPS - select ARCH_CLOCKSOURCE_DATA - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select ARCH_HAS_UBSAN_SANITIZE_ALL -+ select ARCH_HAS_FORTIFY_SOURCE - select ARCH_SUPPORTS_UPROBES - select ARCH_USE_BUILTIN_BSWAP - select ARCH_USE_CMPXCHG_LOCKREF if 64BIT diff --git a/target/linux/generic/backport-5.4/310-v5.6-mips-vdso-fix-jalr-t9-crash-in-vdso-code.patch b/target/linux/generic/backport-5.4/310-v5.6-mips-vdso-fix-jalr-t9-crash-in-vdso-code.patch deleted file mode 100644 index 51eef4b26b..0000000000 --- a/target/linux/generic/backport-5.4/310-v5.6-mips-vdso-fix-jalr-t9-crash-in-vdso-code.patch +++ /dev/null @@ -1,54 +0,0 @@ -From d3f703c4359ff06619b2322b91f69710453e6b6d Mon Sep 17 00:00:00 2001 -From: Victor Kamensky <kamensky@cisco.com> -Date: Tue, 11 Feb 2020 11:24:33 -0800 -Subject: [PATCH] mips: vdso: fix 'jalr t9' crash in vdso code - -Observed that when kernel is built with Yocto mips64-poky-linux-gcc, -and mips64-poky-linux-gnun32-gcc toolchain, resulting vdso contains -'jalr t9' instructions in its code and since in vdso case nobody -sets GOT table code crashes when instruction reached. On other hand -observed that when kernel is built mips-poky-linux-gcc toolchain, the -same 'jalr t9' instruction are replaced with PC relative function -calls using 'bal' instructions. - -The difference boils down to -mrelax-pic-calls and -mexplicit-relocs -gcc options that gets different default values depending on gcc -target triplets and corresponding binutils. -mrelax-pic-calls got -enabled by default only in mips-poky-linux-gcc case. MIPS binutils -ld relies on R_MIPS_JALR relocation to convert 'jalr t9' into 'bal' -and such relocation is generated only if -mrelax-pic-calls option -is on. - -Please note 'jalr t9' conversion to 'bal' can happen only to static -functions. These static PIC calls use mips local GOT entries that -are supposed to be filled with start of DSO value by run-time linker -(missing in VDSO case) and they do not have dynamic relocations. -Global mips GOT entries must have dynamic relocations and they should -be prevented by cmd_vdso_check Makefile rule. - -Solution call out -mrelax-pic-calls and -mexplicit-relocs options -explicitly while compiling MIPS vdso code. That would get correct -and consistent between different toolchains behaviour. - -Reported-by: Bruce Ashfield <bruce.ashfield@gmail.com> -Signed-off-by: Victor Kamensky <kamensky@cisco.com> -Signed-off-by: Paul Burton <paulburton@kernel.org> -Cc: linux-mips@vger.kernel.org -Cc: Ralf Baechle <ralf@linux-mips.org> -Cc: James Hogan <jhogan@kernel.org> -Cc: Vincenzo Frascino <vincenzo.frascino@arm.com> -Cc: richard.purdie@linuxfoundation.org ---- - arch/mips/vdso/Makefile | 1 + - 1 file changed, 1 insertion(+) - ---- a/arch/mips/vdso/Makefile -+++ b/arch/mips/vdso/Makefile -@@ -26,6 +26,7 @@ ccflags-vdso := \ - cflags-vdso := $(ccflags-vdso) \ - $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ -+ -mrelax-pic-calls -mexplicit-relocs \ - -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ - $(call cc-option, -fno-asynchronous-unwind-tables) \ - $(call cc-option, -fno-stack-protector) diff --git a/target/linux/generic/backport-5.4/311-MIPS-Fix-exception-handler-memcpy.patch b/target/linux/generic/backport-5.4/311-MIPS-Fix-exception-handler-memcpy.patch deleted file mode 100644 index 5a6725c7a0..0000000000 --- a/target/linux/generic/backport-5.4/311-MIPS-Fix-exception-handler-memcpy.patch +++ /dev/null @@ -1,107 +0,0 @@ -From e01c91a360793298c9e1656a61faceff01487a43 Mon Sep 17 00:00:00 2001 -From: Ben Hutchings <ben@decadent.org.uk> -Date: Sat, 23 May 2020 23:50:34 +0800 -Subject: [PATCH] MIPS: Fix exception handler memcpy() - -The exception handler subroutines are declared as a single char, but -when copied to the required addresses the copy length is 0x80. - -When range checks are enabled for memcpy() this results in a build -failure, with error messages such as: - -In file included from arch/mips/mti-malta/malta-init.c:15: -In function 'memcpy', - inlined from 'mips_nmi_setup' at arch/mips/mti-malta/malta-init.c:98:2: -include/linux/string.h:376:4: error: call to '__read_overflow2' declared with attribute error: detected read beyond size of object passed as 2nd parameter - 376 | __read_overflow2(); - | ^~~~~~~~~~~~~~~~~~ - -Change the declarations to use type char[]. - -Signed-off-by: Ben Hutchings <ben@decadent.org.uk> -Signed-off-by: YunQiang Su <syq@debian.org> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - arch/mips/loongson64/common/init.c | 4 ++-- - arch/mips/mti-malta/malta-init.c | 8 ++++---- - arch/mips/pistachio/init.c | 8 ++++---- - 3 files changed, 10 insertions(+), 10 deletions(-) - ---- a/arch/mips/loongson64/common/init.c -+++ b/arch/mips/loongson64/common/init.c -@@ -18,10 +18,10 @@ unsigned long __maybe_unused _loongson_a - static void __init mips_nmi_setup(void) - { - void *base; -- extern char except_vec_nmi; -+ extern char except_vec_nmi[]; - - base = (void *)(CAC_BASE + 0x380); -- memcpy(base, &except_vec_nmi, 0x80); -+ memcpy(base, except_vec_nmi, 0x80); - flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); - } - ---- a/arch/mips/mti-malta/malta-init.c -+++ b/arch/mips/mti-malta/malta-init.c -@@ -90,24 +90,24 @@ static void __init console_config(void) - static void __init mips_nmi_setup(void) - { - void *base; -- extern char except_vec_nmi; -+ extern char except_vec_nmi[]; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa80) : - (void *)(CAC_BASE + 0x380); -- memcpy(base, &except_vec_nmi, 0x80); -+ memcpy(base, except_vec_nmi, 0x80); - flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); - } - - static void __init mips_ejtag_setup(void) - { - void *base; -- extern char except_vec_ejtag_debug; -+ extern char except_vec_ejtag_debug[]; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa00) : - (void *)(CAC_BASE + 0x300); -- memcpy(base, &except_vec_ejtag_debug, 0x80); -+ memcpy(base, except_vec_ejtag_debug, 0x80); - flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); - } - ---- a/arch/mips/pistachio/init.c -+++ b/arch/mips/pistachio/init.c -@@ -83,12 +83,12 @@ phys_addr_t mips_cdmm_phys_base(void) - static void __init mips_nmi_setup(void) - { - void *base; -- extern char except_vec_nmi; -+ extern char except_vec_nmi[]; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa80) : - (void *)(CAC_BASE + 0x380); -- memcpy(base, &except_vec_nmi, 0x80); -+ memcpy(base, except_vec_nmi, 0x80); - flush_icache_range((unsigned long)base, - (unsigned long)base + 0x80); - } -@@ -96,12 +96,12 @@ static void __init mips_nmi_setup(void) - static void __init mips_ejtag_setup(void) - { - void *base; -- extern char except_vec_ejtag_debug; -+ extern char except_vec_ejtag_debug[]; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa00) : - (void *)(CAC_BASE + 0x300); -- memcpy(base, &except_vec_ejtag_debug, 0x80); -+ memcpy(base, except_vec_ejtag_debug, 0x80); - flush_icache_range((unsigned long)base, - (unsigned long)base + 0x80); - } diff --git a/target/linux/generic/backport-5.4/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch b/target/linux/generic/backport-5.4/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch deleted file mode 100644 index 501f42d88e..0000000000 --- a/target/linux/generic/backport-5.4/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch +++ /dev/null @@ -1,99 +0,0 @@ -From: Pablo Neira Ayuso <pablo@netfilter.org> -Date: Thu, 25 Jan 2018 12:58:55 +0100 -Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from - nf_flow_table - -Move the code that deals with device events to the core. - -Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> ---- - ---- a/net/netfilter/nf_flow_table_core.c -+++ b/net/netfilter/nf_flow_table_core.c -@@ -529,5 +529,35 @@ void nf_flow_table_free(struct nf_flowta - } - EXPORT_SYMBOL_GPL(nf_flow_table_free); - -+static int nf_flow_table_netdev_event(struct notifier_block *this, -+ unsigned long event, void *ptr) -+{ -+ struct net_device *dev = netdev_notifier_info_to_dev(ptr); -+ -+ if (event != NETDEV_DOWN) -+ return NOTIFY_DONE; -+ -+ nf_flow_table_cleanup(dev); -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block flow_offload_netdev_notifier = { -+ .notifier_call = nf_flow_table_netdev_event, -+}; -+ -+static int __init nf_flow_table_module_init(void) -+{ -+ return register_netdevice_notifier(&flow_offload_netdev_notifier); -+} -+ -+static void __exit nf_flow_table_module_exit(void) -+{ -+ unregister_netdevice_notifier(&flow_offload_netdev_notifier); -+} -+ -+module_init(nf_flow_table_module_init); -+module_exit(nf_flow_table_module_exit); -+ - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ---- a/net/netfilter/nft_flow_offload.c -+++ b/net/netfilter/nft_flow_offload.c -@@ -234,47 +234,14 @@ static struct nft_expr_type nft_flow_off - .owner = THIS_MODULE, - }; - --static int flow_offload_netdev_event(struct notifier_block *this, -- unsigned long event, void *ptr) --{ -- struct net_device *dev = netdev_notifier_info_to_dev(ptr); -- -- if (event != NETDEV_DOWN) -- return NOTIFY_DONE; -- -- nf_flow_table_cleanup(dev); -- -- return NOTIFY_DONE; --} -- --static struct notifier_block flow_offload_netdev_notifier = { -- .notifier_call = flow_offload_netdev_event, --}; -- - static int __init nft_flow_offload_module_init(void) - { -- int err; -- -- err = register_netdevice_notifier(&flow_offload_netdev_notifier); -- if (err) -- goto err; -- -- err = nft_register_expr(&nft_flow_offload_type); -- if (err < 0) -- goto register_expr; -- -- return 0; -- --register_expr: -- unregister_netdevice_notifier(&flow_offload_netdev_notifier); --err: -- return err; -+ return nft_register_expr(&nft_flow_offload_type); - } - - static void __exit nft_flow_offload_module_exit(void) - { - nft_unregister_expr(&nft_flow_offload_type); -- unregister_netdevice_notifier(&flow_offload_netdev_notifier); - } - - module_init(nft_flow_offload_module_init); diff --git a/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch b/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch deleted file mode 100644 index 373a156429..0000000000 --- a/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch +++ /dev/null @@ -1,114 +0,0 @@ -From: Felix Fietkau <nbd@nbd.name> -Date: Wed, 13 Jun 2018 12:33:39 +0200 -Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout - corner case - -The full teardown of offloaded flows is deferred to a gc work item, -however processing of packets by netfilter needs to happen immediately -after a teardown is requested, because the conntrack state needs to be -fixed up. - -Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete, -the netfilter conntrack gc can accidentally bump the timeout of a -connection where offload was just stopped, causing a conntrack entry -leak. - -Fix this by moving the conntrack timeout bumping from conntrack core to -the nf_flow_offload and add a check to prevent bogus timeout bumps. - -Signed-off-by: Felix Fietkau <nbd@nbd.name> ---- - ---- a/net/netfilter/nf_conntrack_core.c -+++ b/net/netfilter/nf_conntrack_core.c -@@ -1207,18 +1207,6 @@ static bool gc_worker_can_early_drop(con - return false; - } - --#define DAY (86400 * HZ) -- --/* Set an arbitrary timeout large enough not to ever expire, this save -- * us a check for the IPS_OFFLOAD_BIT from the packet path via -- * nf_ct_is_expired(). -- */ --static void nf_ct_offload_timeout(struct nf_conn *ct) --{ -- if (nf_ct_expires(ct) < DAY / 2) -- ct->timeout = nfct_time_stamp + DAY; --} -- - static void gc_worker(struct work_struct *work) - { - unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; -@@ -1250,10 +1238,8 @@ static void gc_worker(struct work_struct - - tmp = nf_ct_tuplehash_to_ctrack(h); - -- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { -- nf_ct_offload_timeout(tmp); -+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) - continue; -- } - - if (nf_ct_is_expired(tmp)) { - nf_ct_gc_expired(tmp); ---- a/net/netfilter/nf_flow_table_core.c -+++ b/net/netfilter/nf_flow_table_core.c -@@ -198,10 +198,29 @@ static const struct rhashtable_params nf - .automatic_shrinking = true, - }; - -+#define DAY (86400 * HZ) -+ -+/* Set an arbitrary timeout large enough not to ever expire, this save -+ * us a check for the IPS_OFFLOAD_BIT from the packet path via -+ * nf_ct_is_expired(). -+ */ -+static void nf_ct_offload_timeout(struct flow_offload *flow) -+{ -+ struct flow_offload_entry *entry; -+ struct nf_conn *ct; -+ -+ entry = container_of(flow, struct flow_offload_entry, flow); -+ ct = entry->ct; -+ -+ if (nf_ct_expires(ct) < DAY / 2) -+ ct->timeout = nfct_time_stamp + DAY; -+} -+ - int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) - { - int err; - -+ nf_ct_offload_timeout(flow); - flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; - - err = rhashtable_insert_fast(&flow_table->rhashtable, -@@ -304,6 +323,7 @@ nf_flow_table_iterate(struct nf_flowtabl - rhashtable_walk_start(&hti); - - while ((tuplehash = rhashtable_walk_next(&hti))) { -+ - if (IS_ERR(tuplehash)) { - if (PTR_ERR(tuplehash) != -EAGAIN) { - err = PTR_ERR(tuplehash); -@@ -328,10 +348,17 @@ static void nf_flow_offload_gc_step(stru - { - struct nf_flowtable *flow_table = data; - struct flow_offload_entry *e; -+ bool teardown; - - e = container_of(flow, struct flow_offload_entry, flow); -- if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) || -- (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) -+ -+ teardown = flow->flags & (FLOW_OFFLOAD_DYING | -+ FLOW_OFFLOAD_TEARDOWN); -+ -+ if (!teardown) -+ nf_ct_offload_timeout(flow); -+ -+ if (nf_flow_has_expired(flow) || teardown) - flow_offload_del(flow_table, flow); - } - diff --git a/target/linux/generic/backport-5.4/371-netfilter-nf_flow_table-fix-up-ct-state-of-flows-aft.patch b/target/linux/generic/backport-5.4/371-netfilter-nf_flow_table-fix-up-ct-state-of-flows-aft.patch deleted file mode 100644 index 383641dfb7..0000000000 --- a/target/linux/generic/backport-5.4/371-netfilter-nf_flow_table-fix-up-ct-state-of-flows-aft.patch +++ /dev/null @@ -1,24 +0,0 @@ -From: Felix Fietkau <nbd@nbd.name> -Date: Thu, 14 Jun 2018 11:20:09 +0200 -Subject: [PATCH] netfilter: nf_flow_table: fix up ct state of flows after - timeout - -If a connection simply times out instead of being torn down, it is left -active with a long timeout. Fix this by calling flow_offload_fixup_ct_state -here as well. - -Signed-off-by: Felix Fietkau <nbd@nbd.name> ---- - ---- a/net/netfilter/nf_flow_table_core.c -+++ b/net/netfilter/nf_flow_table_core.c -@@ -268,6 +268,9 @@ static void flow_offload_del(struct nf_f - else if (flow->flags & FLOW_OFFLOAD_TEARDOWN) - flow_offload_fixup_ct_timeout(e->ct); - -+ if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN)) -+ flow_offload_fixup_ct_state(e->ct); -+ - flow_offload_free(flow); - } - diff --git a/target/linux/generic/backport-5.4/393-v5.5-sch_cake-drop-unused-variable-tin_quantum_prio.patch b/target/linux/generic/backport-5.4/393-v5.5-sch_cake-drop-unused-variable-tin_quantum_prio.patch deleted file mode 100644 index 6c9e8ad5ee..0000000000 --- a/target/linux/generic/backport-5.4/393-v5.5-sch_cake-drop-unused-variable-tin_quantum_prio.patch +++ /dev/null @@ -1,158 +0,0 @@ -From d7e1738f0a0b0573ac93cf570ba3df9dee61b68e Mon Sep 17 00:00:00 2001 -From: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> -Date: Wed, 18 Dec 2019 14:05:13 +0000 -Subject: [PATCH 2/2] sch_cake: drop unused variable tin_quantum_prio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Turns out tin_quantum_prio isn't used anymore and is a leftover from a -previous implementation of diffserv tins. Since the variable isn't used -in any calculations it can be eliminated. - -Drop variable and places where it was set. Rename remaining variable -and consolidate naming of intermediate variables that set it. - -Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> -Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - net/sched/sch_cake.c | 59 ++++++++++++++------------------------------ - 1 file changed, 18 insertions(+), 41 deletions(-) - ---- a/net/sched/sch_cake.c -+++ b/net/sched/sch_cake.c -@@ -173,8 +173,7 @@ struct cake_tin_data { - u64 tin_rate_bps; - u16 tin_rate_shft; - -- u16 tin_quantum_prio; -- u16 tin_quantum_band; -+ u16 tin_quantum; - s32 tin_deficit; - u32 tin_backlog; - u32 tin_dropped; -@@ -1947,7 +1946,7 @@ begin: - while (b->tin_deficit < 0 || - !(b->sparse_flow_count + b->bulk_flow_count)) { - if (b->tin_deficit <= 0) -- b->tin_deficit += b->tin_quantum_band; -+ b->tin_deficit += b->tin_quantum; - if (b->sparse_flow_count + b->bulk_flow_count) - empty = false; - -@@ -2269,8 +2268,7 @@ static int cake_config_besteffort(struct - - cake_set_rate(b, rate, mtu, - us_to_ns(q->target), us_to_ns(q->interval)); -- b->tin_quantum_band = 65535; -- b->tin_quantum_prio = 65535; -+ b->tin_quantum = 65535; - - return 0; - } -@@ -2281,8 +2279,7 @@ static int cake_config_precedence(struct - struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->rate_bps; -- u32 quantum1 = 256; -- u32 quantum2 = 256; -+ u32 quantum = 256; - u32 i; - - q->tin_cnt = 8; -@@ -2295,18 +2292,14 @@ static int cake_config_precedence(struct - cake_set_rate(b, rate, mtu, us_to_ns(q->target), - us_to_ns(q->interval)); - -- b->tin_quantum_prio = max_t(u16, 1U, quantum1); -- b->tin_quantum_band = max_t(u16, 1U, quantum2); -+ b->tin_quantum = max_t(u16, 1U, quantum); - - /* calculate next class's parameters */ - rate *= 7; - rate >>= 3; - -- quantum1 *= 3; -- quantum1 >>= 1; -- -- quantum2 *= 7; -- quantum2 >>= 3; -+ quantum *= 7; -+ quantum >>= 3; - } - - return 0; -@@ -2375,8 +2368,7 @@ static int cake_config_diffserv8(struct - struct cake_sched_data *q = qdisc_priv(sch); - u32 mtu = psched_mtu(qdisc_dev(sch)); - u64 rate = q->rate_bps; -- u32 quantum1 = 256; -- u32 quantum2 = 256; -+ u32 quantum = 256; - u32 i; - - q->tin_cnt = 8; -@@ -2392,18 +2384,14 @@ static int cake_config_diffserv8(struct - cake_set_rate(b, rate, mtu, us_to_ns(q->target), - us_to_ns(q->interval)); - -- b->tin_quantum_prio = max_t(u16, 1U, quantum1); -- b->tin_quantum_band = max_t(u16, 1U, quantum2); -+ b->tin_quantum = max_t(u16, 1U, quantum); - - /* calculate next class's parameters */ - rate *= 7; - rate >>= 3; - -- quantum1 *= 3; -- quantum1 >>= 1; -- -- quantum2 *= 7; -- quantum2 >>= 3; -+ quantum *= 7; -+ quantum >>= 3; - } - - return 0; -@@ -2442,17 +2430,11 @@ static int cake_config_diffserv4(struct - cake_set_rate(&q->tins[3], rate >> 2, mtu, - us_to_ns(q->target), us_to_ns(q->interval)); - -- /* priority weights */ -- q->tins[0].tin_quantum_prio = quantum; -- q->tins[1].tin_quantum_prio = quantum >> 4; -- q->tins[2].tin_quantum_prio = quantum << 2; -- q->tins[3].tin_quantum_prio = quantum << 4; -- - /* bandwidth-sharing weights */ -- q->tins[0].tin_quantum_band = quantum; -- q->tins[1].tin_quantum_band = quantum >> 4; -- q->tins[2].tin_quantum_band = quantum >> 1; -- q->tins[3].tin_quantum_band = quantum >> 2; -+ q->tins[0].tin_quantum = quantum; -+ q->tins[1].tin_quantum = quantum >> 4; -+ q->tins[2].tin_quantum = quantum >> 1; -+ q->tins[3].tin_quantum = quantum >> 2; - - return 0; - } -@@ -2483,15 +2465,10 @@ static int cake_config_diffserv3(struct - cake_set_rate(&q->tins[2], rate >> 2, mtu, - us_to_ns(q->target), us_to_ns(q->interval)); - -- /* priority weights */ -- q->tins[0].tin_quantum_prio = quantum; -- q->tins[1].tin_quantum_prio = quantum >> 4; -- q->tins[2].tin_quantum_prio = quantum << 4; -- - /* bandwidth-sharing weights */ -- q->tins[0].tin_quantum_band = quantum; -- q->tins[1].tin_quantum_band = quantum >> 4; -- q->tins[2].tin_quantum_band = quantum >> 2; -+ q->tins[0].tin_quantum = quantum; -+ q->tins[1].tin_quantum = quantum >> 4; -+ q->tins[2].tin_quantum = quantum >> 2; - - return 0; - } diff --git a/target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch b/target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch deleted file mode 100644 index a4981acdee..0000000000 --- a/target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch +++ /dev/null @@ -1,170 +0,0 @@ -From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com> -Date: Fri, 29 May 2020 14:43:44 +0200 -Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -While the other fq-based qdiscs take advantage of skb->hash and doesn't -recompute it if it is already set, sch_cake does not. - -This was a deliberate choice because sch_cake hashes various parts of the -packet header to support its advanced flow isolation modes. However, -foregoing the use of skb->hash entirely loses a few important benefits: - -- When skb->hash is set by hardware, a few CPU cycles can be saved by not - hashing again in software. - -- Tunnel encapsulations will generally preserve the value of skb->hash from - before the encapsulation, which allows flow-based qdiscs to distinguish - between flows even though the outer packet header no longer has flow - information. - -It turns out that we can preserve these desirable properties in many cases, -while still supporting the advanced flow isolation properties of sch_cake. -This patch does so by reusing the skb->hash value as the flow_hash part of -the hashing procedure in cake_hash() only in the following conditions: - -- If the skb->hash is marked as covering the flow headers (skb->l4_hash is - set) - -AND - -- NAT header rewriting is either disabled, or did not change any values - used for hashing. The latter is important to match local-origin packets - such as those of a tunnel endpoint. - -The immediate motivation for fixing this was the recent patch to WireGuard -to preserve the skb->hash on encapsulation. As such, this is also what I -tested against; with this patch, added latency under load for competing -flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel -going through a virtual link shaped to 1Gbps using sch_cake. This matches -the results we saw with a similar setup using sch_fq_codel when testing the -WireGuard patch. - -Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") -Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: David S. Miller <davem@davemloft.net> -Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> ---- - net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++---------- - 1 file changed, 51 insertions(+), 14 deletions(-) - ---- a/net/sched/sch_cake.c -+++ b/net/sched/sch_cake.c -@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct co - return drop; - } - --static void cake_update_flowkeys(struct flow_keys *keys, -+static bool cake_update_flowkeys(struct flow_keys *keys, - const struct sk_buff *skb) - { - #if IS_ENABLED(CONFIG_NF_CONNTRACK) - struct nf_conntrack_tuple tuple = {}; -- bool rev = !skb->_nfct; -+ bool rev = !skb->_nfct, upd = false; -+ __be32 ip; - - if (skb_protocol(skb, true) != htons(ETH_P_IP)) -- return; -+ return false; - - if (!nf_ct_get_tuple_skb(&tuple, skb)) -- return; -+ return false; - -- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; -- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; -+ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; -+ if (ip != keys->addrs.v4addrs.src) { -+ keys->addrs.v4addrs.src = ip; -+ upd = true; -+ } -+ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; -+ if (ip != keys->addrs.v4addrs.dst) { -+ keys->addrs.v4addrs.dst = ip; -+ upd = true; -+ } - - if (keys->ports.ports) { -- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; -- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; -+ __be16 port; -+ -+ port = rev ? tuple.dst.u.all : tuple.src.u.all; -+ if (port != keys->ports.src) { -+ keys->ports.src = port; -+ upd = true; -+ } -+ port = rev ? tuple.src.u.all : tuple.dst.u.all; -+ if (port != keys->ports.dst) { -+ port = keys->ports.dst; -+ upd = true; -+ } - } -+ return upd; -+#else -+ return false; - #endif - } - -@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode) - static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, - int flow_mode, u16 flow_override, u16 host_override) - { -+ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS)); -+ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS)); -+ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG); - u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; - u16 reduced_hash, srchost_idx, dsthost_idx; - struct flow_keys keys, host_keys; -+ bool use_skbhash = skb->l4_hash; - - if (unlikely(flow_mode == CAKE_FLOW_NONE)) - return 0; - -- /* If both overrides are set we can skip packet dissection entirely */ -- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && -- (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) -+ /* If both overrides are set, or we can use the SKB hash and nat mode is -+ * disabled, we can skip packet dissection entirely. If nat mode is -+ * enabled there's another check below after doing the conntrack lookup. -+ */ -+ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts) - goto skip_hash; - - skb_flow_dissect_flow_keys(skb, &keys, - FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - -- if (flow_mode & CAKE_FLOW_NAT_FLAG) -- cake_update_flowkeys(&keys, skb); -+ /* Don't use the SKB hash if we change the lookup keys from conntrack */ -+ if (nat_enabled && cake_update_flowkeys(&keys, skb)) -+ use_skbhash = false; -+ -+ /* If we can still use the SKB hash and don't need the host hash, we can -+ * skip the rest of the hashing procedure -+ */ -+ if (use_skbhash && !hash_hosts) -+ goto skip_hash; - - /* flow_hash_from_keys() sorts the addresses by value, so we have - * to preserve their order in a separate data structure to treat -@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_dat - /* This *must* be after the above switch, since as a - * side-effect it sorts the src and dst addresses. - */ -- if (flow_mode & CAKE_FLOW_FLOWS) -+ if (hash_flows && !use_skbhash) - flow_hash = flow_hash_from_keys(&keys); - - skip_hash: - if (flow_override) - flow_hash = flow_override - 1; -+ else if (use_skbhash) -+ flow_hash = skb->hash; - if (host_override) { - dsthost_hash = host_override - 1; - srchost_hash = host_override - 1; diff --git a/target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch b/target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch deleted file mode 100644 index e171b4cec7..0000000000 --- a/target/linux/generic/backport-5.4/399-5.9-sch_cake-add-RFC-8622-LE-PHB-support-to-CAKE-diffser.patch +++ /dev/null @@ -1,57 +0,0 @@ -From b8392808eb3fc28e523e28cb258c81ca246deb9b Mon Sep 17 00:00:00 2001 -From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> -Date: Thu, 25 Jun 2020 22:18:00 +0200 -Subject: [PATCH] sch_cake: add RFC 8622 LE PHB support to CAKE diffserv - handling -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Change tin mapping on diffserv3, 4 & 8 for LE PHB support, in essence -making LE a member of the Bulk tin. - -Bulk has the least priority and minimum of 1/16th total bandwidth in the -face of higher priority traffic. - -NB: Diffserv 3 & 4 swap tin 0 & 1 priorities from the default order as -found in diffserv8, in case anyone is wondering why it looks a bit odd. - -Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> -[ reword commit message slightly ] -Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - net/sched/sch_cake.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/net/sched/sch_cake.c -+++ b/net/sched/sch_cake.c -@@ -312,8 +312,8 @@ static const u8 precedence[] = { - }; - - static const u8 diffserv8[] = { -- 2, 5, 1, 2, 4, 2, 2, 2, -- 0, 2, 1, 2, 1, 2, 1, 2, -+ 2, 0, 1, 2, 4, 2, 2, 2, -+ 1, 2, 1, 2, 1, 2, 1, 2, - 5, 2, 4, 2, 4, 2, 4, 2, - 3, 2, 3, 2, 3, 2, 3, 2, - 6, 2, 3, 2, 3, 2, 3, 2, -@@ -323,7 +323,7 @@ static const u8 diffserv8[] = { - }; - - static const u8 diffserv4[] = { -- 0, 2, 0, 0, 2, 0, 0, 0, -+ 0, 1, 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 2, 0, 2, 0, 2, 0, - 2, 0, 2, 0, 2, 0, 2, 0, -@@ -334,7 +334,7 @@ static const u8 diffserv4[] = { - }; - - static const u8 diffserv3[] = { -- 0, 0, 0, 0, 2, 0, 0, 0, -+ 0, 1, 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/target/linux/generic/backport-5.4/400-v5.8-dt-bindings-mtd-partition-Document-the-slc-mode-prop.patch b/target/linux/generic/backport-5.4/400-v5.8-dt-bindings-mtd-partition-Document-the-slc-mode-prop.patch deleted file mode 100644 index 7926843686..0000000000 --- a/target/linux/generic/backport-5.4/400-v5.8-dt-bindings-mtd-partition-Document-the-slc-mode-prop.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 422928a040fe17d17ded69c57903c7908423c7ef Mon Sep 17 00:00:00 2001 -From: Boris Brezillon <bbrezillon@kernel.org> -Date: Sun, 3 May 2020 17:53:38 +0200 -Subject: [PATCH] dt-bindings: mtd: partition: Document the slc-mode property - -Add a boolean property to force a specific partition attached to an MLC -NAND to be accessed in an emulated SLC mode this making this partition -immune to paired-pages corruptions. - -Signed-off-by: Boris Brezillon <bbrezillon@kernel.org> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20200503155341.16712-6-miquel.raynal@bootlin.com ---- - Documentation/devicetree/bindings/mtd/partition.txt | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/Documentation/devicetree/bindings/mtd/partition.txt -+++ b/Documentation/devicetree/bindings/mtd/partition.txt -@@ -61,6 +61,9 @@ Optional properties: - clobbered. - - lock : Do not unlock the partition at initialization time (not supported on - all devices) -+- slc-mode: This parameter, if present, allows one to emulate SLC mode on a -+ partition attached to an MLC NAND thus making this partition immune to -+ paired-pages corruptions - - Examples: - diff --git a/target/linux/generic/backport-5.4/401-v5.11-dt-bindings-mtd-convert-fixed-partitions-to-the-json.patch b/target/linux/generic/backport-5.4/401-v5.11-dt-bindings-mtd-convert-fixed-partitions-to-the-json.patch deleted file mode 100644 index 8aded43526..0000000000 --- a/target/linux/generic/backport-5.4/401-v5.11-dt-bindings-mtd-convert-fixed-partitions-to-the-json.patch +++ /dev/null @@ -1,324 +0,0 @@ -From 04e9ab75267489224364fa510a88ada83e11c325 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Thu, 10 Dec 2020 18:23:52 +0100 -Subject: [PATCH] dt-bindings: mtd: convert "fixed-partitions" to the - json-schema -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This standardizes its documentation, allows validating with Makefile -checks and helps writing DTS files. - -Noticeable changes: -1. Dropped "Partitions can be represented by sub-nodes of a flash - device." as we also support subpartitions (don't have to be part of - flash device node) -2. Dropped "to Linux" as bindings are meant to be os agnostic. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Link: https://lore.kernel.org/r/20201210172352.31632-1-zajec5@gmail.com -Signed-off-by: Rob Herring <robh@kernel.org> ---- - .../devicetree/bindings/mtd/partition.txt | 131 +-------------- - .../mtd/partitions/fixed-partitions.yaml | 152 ++++++++++++++++++ - 2 files changed, 154 insertions(+), 129 deletions(-) - create mode 100644 Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml - ---- a/Documentation/devicetree/bindings/mtd/partition.txt -+++ b/Documentation/devicetree/bindings/mtd/partition.txt -@@ -24,137 +24,10 @@ another partitioning method. - Available bindings are listed in the "partitions" subdirectory. - - --Fixed Partitions --================ -- --Partitions can be represented by sub-nodes of a flash device. This can be used --on platforms which have strong conventions about which portions of a flash are --used for what purposes, but which don't use an on-flash partition table such --as RedBoot. -- --The partition table should be a subnode of the flash node and should be named --'partitions'. This node should have the following property: --- compatible : (required) must be "fixed-partitions" --Partitions are then defined in subnodes of the partitions node. -+Deprecated: partitions defined in flash node -+============================================ - - For backwards compatibility partitions as direct subnodes of the flash device are - supported. This use is discouraged. - NOTE: also for backwards compatibility, direct subnodes that have a compatible - string are not considered partitions, as they may be used for other bindings. -- --#address-cells & #size-cells must both be present in the partitions subnode of the --flash device. There are two valid values for both: --<1>: for partitions that require a single 32-bit cell to represent their -- size/address (aka the value is below 4 GiB) --<2>: for partitions that require two 32-bit cells to represent their -- size/address (aka the value is 4 GiB or greater). -- --Required properties: --- reg : The partition's offset and size within the flash -- --Optional properties: --- label : The label / name for this partition. If omitted, the label is taken -- from the node name (excluding the unit address). --- read-only : This parameter, if present, is a hint to Linux that this -- partition should only be mounted read-only. This is usually used for flash -- partitions containing early-boot firmware images or data which should not be -- clobbered. --- lock : Do not unlock the partition at initialization time (not supported on -- all devices) --- slc-mode: This parameter, if present, allows one to emulate SLC mode on a -- partition attached to an MLC NAND thus making this partition immune to -- paired-pages corruptions -- --Examples: -- -- --flash@0 { -- partitions { -- compatible = "fixed-partitions"; -- #address-cells = <1>; -- #size-cells = <1>; -- -- partition@0 { -- label = "u-boot"; -- reg = <0x0000000 0x100000>; -- read-only; -- }; -- -- uimage@100000 { -- reg = <0x0100000 0x200000>; -- }; -- }; --}; -- --flash@1 { -- partitions { -- compatible = "fixed-partitions"; -- #address-cells = <1>; -- #size-cells = <2>; -- -- /* a 4 GiB partition */ -- partition@0 { -- label = "filesystem"; -- reg = <0x00000000 0x1 0x00000000>; -- }; -- }; --}; -- --flash@2 { -- partitions { -- compatible = "fixed-partitions"; -- #address-cells = <2>; -- #size-cells = <2>; -- -- /* an 8 GiB partition */ -- partition@0 { -- label = "filesystem #1"; -- reg = <0x0 0x00000000 0x2 0x00000000>; -- }; -- -- /* a 4 GiB partition */ -- partition@200000000 { -- label = "filesystem #2"; -- reg = <0x2 0x00000000 0x1 0x00000000>; -- }; -- }; --}; -- --flash@3 { -- partitions { -- compatible = "fixed-partitions"; -- #address-cells = <1>; -- #size-cells = <1>; -- -- partition@0 { -- label = "bootloader"; -- reg = <0x000000 0x100000>; -- read-only; -- }; -- -- firmware@100000 { -- label = "firmware"; -- reg = <0x100000 0xe00000>; -- compatible = "brcm,trx"; -- }; -- -- calibration@f00000 { -- label = "calibration"; -- reg = <0xf00000 0x100000>; -- compatible = "fixed-partitions"; -- ranges = <0 0xf00000 0x100000>; -- #address-cells = <1>; -- #size-cells = <1>; -- -- partition@0 { -- label = "wifi0"; -- reg = <0x000000 0x080000>; -- }; -- -- partition@80000 { -- label = "wifi1"; -- reg = <0x080000 0x080000>; -- }; -- }; -- }; --}; ---- /dev/null -+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml -@@ -0,0 +1,152 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/mtd/partitions/fixed-partitions.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Fixed partitions -+ -+description: | -+ This binding can be used on platforms which have strong conventions about -+ which portions of a flash are used for what purposes, but which don't use an -+ on-flash partition table such as RedBoot. -+ -+ The partition table should be a node named "partitions". Partitions are then -+ defined as subnodes. -+ -+maintainers: -+ - Rafał Miłecki <rafal@milecki.pl> -+ -+properties: -+ compatible: -+ const: fixed-partitions -+ -+ "#address-cells": true -+ -+ "#size-cells": true -+ -+patternProperties: -+ "@[0-9a-f]+$": -+ description: node describing a single flash partition -+ type: object -+ -+ properties: -+ reg: -+ description: partition's offset and size within the flash -+ maxItems: 1 -+ -+ label: -+ description: The label / name for this partition. If omitted, the label -+ is taken from the node name (excluding the unit address). -+ -+ read-only: -+ description: This parameter, if present, is a hint that this partition -+ should only be mounted read-only. This is usually used for flash -+ partitions containing early-boot firmware images or data which should -+ not be clobbered. -+ type: boolean -+ -+ lock: -+ description: Do not unlock the partition at initialization time (not -+ supported on all devices) -+ type: boolean -+ -+ slc-mode: -+ description: This parameter, if present, allows one to emulate SLC mode -+ on a partition attached to an MLC NAND thus making this partition -+ immune to paired-pages corruptions -+ type: boolean -+ -+ required: -+ - reg -+ -+required: -+ - "#address-cells" -+ - "#size-cells" -+ -+additionalProperties: true -+ -+examples: -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "u-boot"; -+ reg = <0x0000000 0x100000>; -+ read-only; -+ }; -+ -+ uimage@100000 { -+ reg = <0x0100000 0x200000>; -+ }; -+ }; -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <1>; -+ #size-cells = <2>; -+ -+ /* a 4 GiB partition */ -+ partition@0 { -+ label = "filesystem"; -+ reg = <0x00000000 0x1 0x00000000>; -+ }; -+ }; -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <2>; -+ #size-cells = <2>; -+ -+ /* an 8 GiB partition */ -+ partition@0 { -+ label = "filesystem #1"; -+ reg = <0x0 0x00000000 0x2 0x00000000>; -+ }; -+ -+ /* a 4 GiB partition */ -+ partition@200000000 { -+ label = "filesystem #2"; -+ reg = <0x2 0x00000000 0x1 0x00000000>; -+ }; -+ }; -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "bootloader"; -+ reg = <0x000000 0x100000>; -+ read-only; -+ }; -+ -+ firmware@100000 { -+ compatible = "brcm,trx"; -+ label = "firmware"; -+ reg = <0x100000 0xe00000>; -+ }; -+ -+ calibration@f00000 { -+ compatible = "fixed-partitions"; -+ label = "calibration"; -+ reg = <0xf00000 0x100000>; -+ ranges = <0 0xf00000 0x100000>; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "wifi0"; -+ reg = <0x000000 0x080000>; -+ }; -+ -+ partition@80000 { -+ label = "wifi1"; -+ reg = <0x080000 0x080000>; -+ }; -+ }; -+ }; diff --git a/target/linux/generic/backport-5.4/402-v5.12-0001-dt-bindings-mtd-move-partition-binding-to-its-own-fi.patch b/target/linux/generic/backport-5.4/402-v5.12-0001-dt-bindings-mtd-move-partition-binding-to-its-own-fi.patch deleted file mode 100644 index f3b1179ecd..0000000000 --- a/target/linux/generic/backport-5.4/402-v5.12-0001-dt-bindings-mtd-move-partition-binding-to-its-own-fi.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 6418522022c706fd867b00b2571edba48b8fa8c7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Thu, 11 Feb 2021 23:04:25 +0100 -Subject: [PATCH] dt-bindings: mtd: move partition binding to its own file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Single partition binding is quite common and may be: -1. Used by multiple parsers -2. Extended for more specific cases - -Move it to separated file to avoid code duplication. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Reviewed-by: Rob Herring <robh@kernel.org> -Signed-off-by: Richard Weinberger <richard@nod.at> ---- - .../mtd/partitions/fixed-partitions.yaml | 33 +------------ - .../bindings/mtd/partitions/partition.yaml | 47 +++++++++++++++++++ - 2 files changed, 48 insertions(+), 32 deletions(-) - create mode 100644 Documentation/devicetree/bindings/mtd/partitions/partition.yaml - ---- a/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml -+++ b/Documentation/devicetree/bindings/mtd/partitions/fixed-partitions.yaml -@@ -27,38 +27,7 @@ properties: - - patternProperties: - "@[0-9a-f]+$": -- description: node describing a single flash partition -- type: object -- -- properties: -- reg: -- description: partition's offset and size within the flash -- maxItems: 1 -- -- label: -- description: The label / name for this partition. If omitted, the label -- is taken from the node name (excluding the unit address). -- -- read-only: -- description: This parameter, if present, is a hint that this partition -- should only be mounted read-only. This is usually used for flash -- partitions containing early-boot firmware images or data which should -- not be clobbered. -- type: boolean -- -- lock: -- description: Do not unlock the partition at initialization time (not -- supported on all devices) -- type: boolean -- -- slc-mode: -- description: This parameter, if present, allows one to emulate SLC mode -- on a partition attached to an MLC NAND thus making this partition -- immune to paired-pages corruptions -- type: boolean -- -- required: -- - reg -+ $ref: "partition.yaml#" - - required: - - "#address-cells" ---- /dev/null -+++ b/Documentation/devicetree/bindings/mtd/partitions/partition.yaml -@@ -0,0 +1,47 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/mtd/partitions/partition.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Partition -+ -+description: | -+ This binding describes a single flash partition. Each partition must have its -+ relative offset and size specified. Depending on partition function extra -+ properties can be used. -+ -+maintainers: -+ - Rafał Miłecki <rafal@milecki.pl> -+ -+properties: -+ reg: -+ description: partition's offset and size within the flash -+ maxItems: 1 -+ -+ label: -+ description: The label / name for this partition. If omitted, the label -+ is taken from the node name (excluding the unit address). -+ -+ read-only: -+ description: This parameter, if present, is a hint that this partition -+ should only be mounted read-only. This is usually used for flash -+ partitions containing early-boot firmware images or data which should -+ not be clobbered. -+ type: boolean -+ -+ lock: -+ description: Do not unlock the partition at initialization time (not -+ supported on all devices) -+ type: boolean -+ -+ slc-mode: -+ description: This parameter, if present, allows one to emulate SLC mode -+ on a partition attached to an MLC NAND thus making this partition -+ immune to paired-pages corruptions -+ type: boolean -+ -+required: -+ - reg -+ -+additionalProperties: true diff --git a/target/linux/generic/backport-5.4/402-v5.12-0002-dt-bindings-mtd-add-binding-for-BCM4908-partitions.patch b/target/linux/generic/backport-5.4/402-v5.12-0002-dt-bindings-mtd-add-binding-for-BCM4908-partitions.patch deleted file mode 100644 index 8576c7d78d..0000000000 --- a/target/linux/generic/backport-5.4/402-v5.12-0002-dt-bindings-mtd-add-binding-for-BCM4908-partitions.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 6e9dff6fe3fbc452f16566e4a7e293b0decefdba Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Thu, 11 Feb 2021 23:04:26 +0100 -Subject: [PATCH] dt-bindings: mtd: add binding for BCM4908 partitions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -BCM4908 uses fixed partitions layout but function of some partitions may -vary. Some devices use multiple firmware partitions and those partitions -should be marked to let system discover their purpose. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Richard Weinberger <richard@nod.at> ---- - .../partitions/brcm,bcm4908-partitions.yaml | 70 +++++++++++++++++++ - 1 file changed, 70 insertions(+) - create mode 100644 Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml - ---- /dev/null -+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm4908-partitions.yaml -@@ -0,0 +1,70 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/mtd/partitions/brcm,bcm4908-partitions.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Broadcom BCM4908 partitioning -+ -+description: | -+ Broadcom BCM4908 CFE bootloader supports two firmware partitions. One is used -+ for regular booting, the other is treated as fallback. -+ -+ This binding allows defining all fixed partitions and marking those containing -+ firmware. System can use that information e.g. for booting or flashing -+ purposes. -+ -+maintainers: -+ - Rafał Miłecki <rafal@milecki.pl> -+ -+properties: -+ compatible: -+ const: brcm,bcm4908-partitions -+ -+ "#address-cells": -+ enum: [ 1, 2 ] -+ -+ "#size-cells": -+ enum: [ 1, 2 ] -+ -+patternProperties: -+ "^partition@[0-9a-f]+$": -+ $ref: "partition.yaml#" -+ properties: -+ compatible: -+ const: brcm,bcm4908-firmware -+ unevaluatedProperties: false -+ -+required: -+ - "#address-cells" -+ - "#size-cells" -+ -+additionalProperties: false -+ -+examples: -+ - | -+ partitions { -+ compatible = "brcm,bcm4908-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "cferom"; -+ reg = <0x0 0x100000>; -+ }; -+ -+ partition@100000 { -+ compatible = "brcm,bcm4908-firmware"; -+ reg = <0x100000 0xf00000>; -+ }; -+ -+ partition@1000000 { -+ compatible = "brcm,bcm4908-firmware"; -+ reg = <0x1000000 0xf00000>; -+ }; -+ -+ partition@1f00000 { -+ label = "calibration"; -+ reg = <0x1f00000 0x100000>; -+ }; -+ }; diff --git a/target/linux/generic/backport-5.4/403-v5.13-mtd-parsers-ofpart-support-BCM4908-fixed-partitions.patch b/target/linux/generic/backport-5.4/403-v5.13-mtd-parsers-ofpart-support-BCM4908-fixed-partitions.patch deleted file mode 100644 index 8f292bd177..0000000000 --- a/target/linux/generic/backport-5.4/403-v5.13-mtd-parsers-ofpart-support-BCM4908-fixed-partitions.patch +++ /dev/null @@ -1,648 +0,0 @@ -From afbef8efb591792579c633a7c545f914c6165f82 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Thu, 11 Feb 2021 23:04:27 +0100 -Subject: [PATCH] mtd: parsers: ofpart: support BCM4908 fixed partitions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Some devices use fixed partitioning with some partitions requiring some -extra logic. E.g. BCM4908 may have multiple firmware partitions but -detecting currently used one requires checking bootloader parameters. - -To support such cases without duplicating a lot of code (without copying -most of the ofpart.c code) support for post-parsing callback was added. - -BCM4908 support in ofpart can be enabled using config option and results -in compiling & executing a specific callback. It simply reads offset of -currently used firmware partition from the DT. Bootloader specifies it -using the "brcm_blparms" property. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> ---- - drivers/mtd/parsers/Kconfig | 9 +++ - drivers/mtd/parsers/Makefile | 2 + - drivers/mtd/parsers/ofpart_bcm4908.c | 64 +++++++++++++++++++ - drivers/mtd/parsers/ofpart_bcm4908.h | 15 +++++ - .../mtd/parsers/{ofpart.c => ofpart_core.c} | 28 +++++++- - 5 files changed, 116 insertions(+), 2 deletions(-) - create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.c - create mode 100644 drivers/mtd/parsers/ofpart_bcm4908.h - rename drivers/mtd/parsers/{ofpart.c => ofpart_core.c} (88%) - ---- a/drivers/mtd/parsers/Kconfig -+++ b/drivers/mtd/parsers/Kconfig -@@ -67,6 +67,15 @@ config MTD_OF_PARTS - flash memory node, as described in - Documentation/devicetree/bindings/mtd/partition.txt. - -+config MTD_OF_PARTS_BCM4908 -+ bool "BCM4908 partitioning support" -+ depends on MTD_OF_PARTS && (ARCH_BCM4908 || COMPILE_TEST) -+ default ARCH_BCM4908 -+ help -+ This provides partitions parser for BCM4908 family devices -+ that can have multiple "firmware" partitions. It takes care of -+ finding currently used one and backup ones. -+ - config MTD_PARSER_IMAGETAG - tristate "Parser for BCM963XX Image Tag format partitions" - depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST ---- a/drivers/mtd/parsers/Makefile -+++ b/drivers/mtd/parsers/Makefile -@@ -4,6 +4,8 @@ obj-$(CONFIG_MTD_BCM47XX_PARTS) += bcm4 - obj-$(CONFIG_MTD_BCM63XX_PARTS) += bcm63xxpart.o - obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o - obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o -+ofpart-y += ofpart_core.o -+ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o - obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o - obj-$(CONFIG_MTD_AFS_PARTS) += afs.o - obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o ---- /dev/null -+++ b/drivers/mtd/parsers/ofpart_bcm4908.c -@@ -0,0 +1,64 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl> -+ */ -+ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/of.h> -+#include <linux/mtd/mtd.h> -+#include <linux/slab.h> -+#include <linux/mtd/partitions.h> -+ -+#include "ofpart_bcm4908.h" -+ -+#define BLPARAMS_FW_OFFSET "NAND_RFS_OFS" -+ -+static long long bcm4908_partitions_fw_offset(void) -+{ -+ struct device_node *root; -+ struct property *prop; -+ const char *s; -+ -+ root = of_find_node_by_path("/"); -+ if (!root) -+ return -ENOENT; -+ -+ of_property_for_each_string(root, "brcm_blparms", prop, s) { -+ size_t len = strlen(BLPARAMS_FW_OFFSET); -+ unsigned long offset; -+ int err; -+ -+ if (strncmp(s, BLPARAMS_FW_OFFSET, len) || s[len] != '=') -+ continue; -+ -+ err = kstrtoul(s + len + 1, 0, &offset); -+ if (err) { -+ pr_err("failed to parse %s\n", s + len + 1); -+ return err; -+ } -+ -+ return offset << 10; -+ } -+ -+ return -ENOENT; -+} -+ -+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts) -+{ -+ long long fw_offset; -+ int i; -+ -+ fw_offset = bcm4908_partitions_fw_offset(); -+ -+ for (i = 0; i < nr_parts; i++) { -+ if (of_device_is_compatible(parts[i].of_node, "brcm,bcm4908-firmware")) { -+ if (fw_offset < 0 || parts[i].offset == fw_offset) -+ parts[i].name = "firmware"; -+ else -+ parts[i].name = "backup"; -+ } -+ } -+ -+ return 0; -+} ---- /dev/null -+++ b/drivers/mtd/parsers/ofpart_bcm4908.h -@@ -0,0 +1,15 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef __BCM4908_PARTITIONS_H -+#define __BCM4908_PARTITIONS_H -+ -+#ifdef CONFIG_MTD_OF_PARTS_BCM4908 -+int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts); -+#else -+static inline int bcm4908_partitions_post_parse(struct mtd_info *mtd, struct mtd_partition *parts, -+ int nr_parts) -+{ -+ return -EOPNOTSUPP; -+} -+#endif -+ -+#endif ---- a/drivers/mtd/parsers/ofpart.c -+++ /dev/null -@@ -1,236 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-or-later --/* -- * Flash partitions described by the OF (or flattened) device tree -- * -- * Copyright © 2006 MontaVista Software Inc. -- * Author: Vitaly Wool <vwool@ru.mvista.com> -- * -- * Revised to handle newer style flash binding by: -- * Copyright © 2007 David Gibson, IBM Corporation. -- */ -- --#include <linux/module.h> --#include <linux/init.h> --#include <linux/of.h> --#include <linux/mtd/mtd.h> --#include <linux/slab.h> --#include <linux/mtd/partitions.h> -- --static bool node_has_compatible(struct device_node *pp) --{ -- return of_get_property(pp, "compatible", NULL); --} -- --static int parse_fixed_partitions(struct mtd_info *master, -- const struct mtd_partition **pparts, -- struct mtd_part_parser_data *data) --{ -- struct mtd_partition *parts; -- struct device_node *mtd_node; -- struct device_node *ofpart_node; -- const char *partname; -- struct device_node *pp; -- int nr_parts, i, ret = 0; -- bool dedicated = true; -- -- -- /* Pull of_node from the master device node */ -- mtd_node = mtd_get_of_node(master); -- if (!mtd_node) -- return 0; -- -- ofpart_node = of_get_child_by_name(mtd_node, "partitions"); -- if (!ofpart_node) { -- /* -- * We might get here even when ofpart isn't used at all (e.g., -- * when using another parser), so don't be louder than -- * KERN_DEBUG -- */ -- pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", -- master->name, mtd_node); -- ofpart_node = mtd_node; -- dedicated = false; -- } else if (!of_device_is_compatible(ofpart_node, "fixed-partitions")) { -- /* The 'partitions' subnode might be used by another parser */ -- return 0; -- } -- -- /* First count the subnodes */ -- nr_parts = 0; -- for_each_child_of_node(ofpart_node, pp) { -- if (!dedicated && node_has_compatible(pp)) -- continue; -- -- nr_parts++; -- } -- -- if (nr_parts == 0) -- return 0; -- -- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); -- if (!parts) -- return -ENOMEM; -- -- i = 0; -- for_each_child_of_node(ofpart_node, pp) { -- const __be32 *reg; -- int len; -- int a_cells, s_cells; -- -- if (!dedicated && node_has_compatible(pp)) -- continue; -- -- reg = of_get_property(pp, "reg", &len); -- if (!reg) { -- if (dedicated) { -- pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n", -- master->name, pp, -- mtd_node); -- goto ofpart_fail; -- } else { -- nr_parts--; -- continue; -- } -- } -- -- a_cells = of_n_addr_cells(pp); -- s_cells = of_n_size_cells(pp); -- if (len / 4 != a_cells + s_cells) { -- pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n", -- master->name, pp, -- mtd_node); -- goto ofpart_fail; -- } -- -- parts[i].offset = of_read_number(reg, a_cells); -- parts[i].size = of_read_number(reg + a_cells, s_cells); -- parts[i].of_node = pp; -- -- partname = of_get_property(pp, "label", &len); -- if (!partname) -- partname = of_get_property(pp, "name", &len); -- parts[i].name = partname; -- -- if (of_get_property(pp, "read-only", &len)) -- parts[i].mask_flags |= MTD_WRITEABLE; -- -- if (of_get_property(pp, "lock", &len)) -- parts[i].mask_flags |= MTD_POWERUP_LOCK; -- -- i++; -- } -- -- if (!nr_parts) -- goto ofpart_none; -- -- *pparts = parts; -- return nr_parts; -- --ofpart_fail: -- pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n", -- master->name, pp, mtd_node); -- ret = -EINVAL; --ofpart_none: -- of_node_put(pp); -- kfree(parts); -- return ret; --} -- --static const struct of_device_id parse_ofpart_match_table[] = { -- { .compatible = "fixed-partitions" }, -- {}, --}; --MODULE_DEVICE_TABLE(of, parse_ofpart_match_table); -- --static struct mtd_part_parser ofpart_parser = { -- .parse_fn = parse_fixed_partitions, -- .name = "fixed-partitions", -- .of_match_table = parse_ofpart_match_table, --}; -- --static int parse_ofoldpart_partitions(struct mtd_info *master, -- const struct mtd_partition **pparts, -- struct mtd_part_parser_data *data) --{ -- struct mtd_partition *parts; -- struct device_node *dp; -- int i, plen, nr_parts; -- const struct { -- __be32 offset, len; -- } *part; -- const char *names; -- -- /* Pull of_node from the master device node */ -- dp = mtd_get_of_node(master); -- if (!dp) -- return 0; -- -- part = of_get_property(dp, "partitions", &plen); -- if (!part) -- return 0; /* No partitions found */ -- -- pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp); -- -- nr_parts = plen / sizeof(part[0]); -- -- parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); -- if (!parts) -- return -ENOMEM; -- -- names = of_get_property(dp, "partition-names", &plen); -- -- for (i = 0; i < nr_parts; i++) { -- parts[i].offset = be32_to_cpu(part->offset); -- parts[i].size = be32_to_cpu(part->len) & ~1; -- /* bit 0 set signifies read only partition */ -- if (be32_to_cpu(part->len) & 1) -- parts[i].mask_flags = MTD_WRITEABLE; -- -- if (names && (plen > 0)) { -- int len = strlen(names) + 1; -- -- parts[i].name = names; -- plen -= len; -- names += len; -- } else { -- parts[i].name = "unnamed"; -- } -- -- part++; -- } -- -- *pparts = parts; -- return nr_parts; --} -- --static struct mtd_part_parser ofoldpart_parser = { -- .parse_fn = parse_ofoldpart_partitions, -- .name = "ofoldpart", --}; -- --static int __init ofpart_parser_init(void) --{ -- register_mtd_parser(&ofpart_parser); -- register_mtd_parser(&ofoldpart_parser); -- return 0; --} -- --static void __exit ofpart_parser_exit(void) --{ -- deregister_mtd_parser(&ofpart_parser); -- deregister_mtd_parser(&ofoldpart_parser); --} -- --module_init(ofpart_parser_init); --module_exit(ofpart_parser_exit); -- --MODULE_LICENSE("GPL"); --MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree"); --MODULE_AUTHOR("Vitaly Wool, David Gibson"); --/* -- * When MTD core cannot find the requested parser, it tries to load the module -- * with the same name. Since we provide the ofoldpart parser, we should have -- * the corresponding alias. -- */ --MODULE_ALIAS("fixed-partitions"); --MODULE_ALIAS("ofoldpart"); ---- /dev/null -+++ b/drivers/mtd/parsers/ofpart_core.c -@@ -0,0 +1,260 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* -+ * Flash partitions described by the OF (or flattened) device tree -+ * -+ * Copyright © 2006 MontaVista Software Inc. -+ * Author: Vitaly Wool <vwool@ru.mvista.com> -+ * -+ * Revised to handle newer style flash binding by: -+ * Copyright © 2007 David Gibson, IBM Corporation. -+ */ -+ -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/of.h> -+#include <linux/mtd/mtd.h> -+#include <linux/slab.h> -+#include <linux/mtd/partitions.h> -+ -+#include "ofpart_bcm4908.h" -+ -+struct fixed_partitions_quirks { -+ int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts); -+}; -+ -+struct fixed_partitions_quirks bcm4908_partitions_quirks = { -+ .post_parse = bcm4908_partitions_post_parse, -+}; -+ -+static const struct of_device_id parse_ofpart_match_table[]; -+ -+static bool node_has_compatible(struct device_node *pp) -+{ -+ return of_get_property(pp, "compatible", NULL); -+} -+ -+static int parse_fixed_partitions(struct mtd_info *master, -+ const struct mtd_partition **pparts, -+ struct mtd_part_parser_data *data) -+{ -+ const struct fixed_partitions_quirks *quirks; -+ const struct of_device_id *of_id; -+ struct mtd_partition *parts; -+ struct device_node *mtd_node; -+ struct device_node *ofpart_node; -+ const char *partname; -+ struct device_node *pp; -+ int nr_parts, i, ret = 0; -+ bool dedicated = true; -+ -+ /* Pull of_node from the master device node */ -+ mtd_node = mtd_get_of_node(master); -+ if (!mtd_node) -+ return 0; -+ -+ ofpart_node = of_get_child_by_name(mtd_node, "partitions"); -+ if (!ofpart_node) { -+ /* -+ * We might get here even when ofpart isn't used at all (e.g., -+ * when using another parser), so don't be louder than -+ * KERN_DEBUG -+ */ -+ pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", -+ master->name, mtd_node); -+ ofpart_node = mtd_node; -+ dedicated = false; -+ } -+ -+ of_id = of_match_node(parse_ofpart_match_table, ofpart_node); -+ if (dedicated && !of_id) { -+ /* The 'partitions' subnode might be used by another parser */ -+ return 0; -+ } -+ -+ quirks = of_id ? of_id->data : NULL; -+ -+ /* First count the subnodes */ -+ nr_parts = 0; -+ for_each_child_of_node(ofpart_node, pp) { -+ if (!dedicated && node_has_compatible(pp)) -+ continue; -+ -+ nr_parts++; -+ } -+ -+ if (nr_parts == 0) -+ return 0; -+ -+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); -+ if (!parts) -+ return -ENOMEM; -+ -+ i = 0; -+ for_each_child_of_node(ofpart_node, pp) { -+ const __be32 *reg; -+ int len; -+ int a_cells, s_cells; -+ -+ if (!dedicated && node_has_compatible(pp)) -+ continue; -+ -+ reg = of_get_property(pp, "reg", &len); -+ if (!reg) { -+ if (dedicated) { -+ pr_debug("%s: ofpart partition %pOF (%pOF) missing reg property.\n", -+ master->name, pp, -+ mtd_node); -+ goto ofpart_fail; -+ } else { -+ nr_parts--; -+ continue; -+ } -+ } -+ -+ a_cells = of_n_addr_cells(pp); -+ s_cells = of_n_size_cells(pp); -+ if (len / 4 != a_cells + s_cells) { -+ pr_debug("%s: ofpart partition %pOF (%pOF) error parsing reg property.\n", -+ master->name, pp, -+ mtd_node); -+ goto ofpart_fail; -+ } -+ -+ parts[i].offset = of_read_number(reg, a_cells); -+ parts[i].size = of_read_number(reg + a_cells, s_cells); -+ parts[i].of_node = pp; -+ -+ partname = of_get_property(pp, "label", &len); -+ if (!partname) -+ partname = of_get_property(pp, "name", &len); -+ parts[i].name = partname; -+ -+ if (of_get_property(pp, "read-only", &len)) -+ parts[i].mask_flags |= MTD_WRITEABLE; -+ -+ if (of_get_property(pp, "lock", &len)) -+ parts[i].mask_flags |= MTD_POWERUP_LOCK; -+ -+ i++; -+ } -+ -+ if (!nr_parts) -+ goto ofpart_none; -+ -+ if (quirks && quirks->post_parse) -+ quirks->post_parse(master, parts, nr_parts); -+ -+ *pparts = parts; -+ return nr_parts; -+ -+ofpart_fail: -+ pr_err("%s: error parsing ofpart partition %pOF (%pOF)\n", -+ master->name, pp, mtd_node); -+ ret = -EINVAL; -+ofpart_none: -+ of_node_put(pp); -+ kfree(parts); -+ return ret; -+} -+ -+static const struct of_device_id parse_ofpart_match_table[] = { -+ /* Generic */ -+ { .compatible = "fixed-partitions" }, -+ /* Customized */ -+ { .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, }, -+ {}, -+}; -+MODULE_DEVICE_TABLE(of, parse_ofpart_match_table); -+ -+static struct mtd_part_parser ofpart_parser = { -+ .parse_fn = parse_fixed_partitions, -+ .name = "fixed-partitions", -+ .of_match_table = parse_ofpart_match_table, -+}; -+ -+static int parse_ofoldpart_partitions(struct mtd_info *master, -+ const struct mtd_partition **pparts, -+ struct mtd_part_parser_data *data) -+{ -+ struct mtd_partition *parts; -+ struct device_node *dp; -+ int i, plen, nr_parts; -+ const struct { -+ __be32 offset, len; -+ } *part; -+ const char *names; -+ -+ /* Pull of_node from the master device node */ -+ dp = mtd_get_of_node(master); -+ if (!dp) -+ return 0; -+ -+ part = of_get_property(dp, "partitions", &plen); -+ if (!part) -+ return 0; /* No partitions found */ -+ -+ pr_warn("Device tree uses obsolete partition map binding: %pOF\n", dp); -+ -+ nr_parts = plen / sizeof(part[0]); -+ -+ parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); -+ if (!parts) -+ return -ENOMEM; -+ -+ names = of_get_property(dp, "partition-names", &plen); -+ -+ for (i = 0; i < nr_parts; i++) { -+ parts[i].offset = be32_to_cpu(part->offset); -+ parts[i].size = be32_to_cpu(part->len) & ~1; -+ /* bit 0 set signifies read only partition */ -+ if (be32_to_cpu(part->len) & 1) -+ parts[i].mask_flags = MTD_WRITEABLE; -+ -+ if (names && (plen > 0)) { -+ int len = strlen(names) + 1; -+ -+ parts[i].name = names; -+ plen -= len; -+ names += len; -+ } else { -+ parts[i].name = "unnamed"; -+ } -+ -+ part++; -+ } -+ -+ *pparts = parts; -+ return nr_parts; -+} -+ -+static struct mtd_part_parser ofoldpart_parser = { -+ .parse_fn = parse_ofoldpart_partitions, -+ .name = "ofoldpart", -+}; -+ -+static int __init ofpart_parser_init(void) -+{ -+ register_mtd_parser(&ofpart_parser); -+ register_mtd_parser(&ofoldpart_parser); -+ return 0; -+} -+ -+static void __exit ofpart_parser_exit(void) -+{ -+ deregister_mtd_parser(&ofpart_parser); -+ deregister_mtd_parser(&ofoldpart_parser); -+} -+ -+module_init(ofpart_parser_init); -+module_exit(ofpart_parser_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree"); -+MODULE_AUTHOR("Vitaly Wool, David Gibson"); -+/* -+ * When MTD core cannot find the requested parser, it tries to load the module -+ * with the same name. Since we provide the ofoldpart parser, we should have -+ * the corresponding alias. -+ */ -+MODULE_ALIAS("fixed-partitions"); -+MODULE_ALIAS("ofoldpart"); diff --git a/target/linux/generic/backport-5.4/404-v5.13-mtd-parsers-ofpart-limit-parsing-of-deprecated-DT-sy.patch b/target/linux/generic/backport-5.4/404-v5.13-mtd-parsers-ofpart-limit-parsing-of-deprecated-DT-sy.patch deleted file mode 100644 index 35058adba7..0000000000 --- a/target/linux/generic/backport-5.4/404-v5.13-mtd-parsers-ofpart-limit-parsing-of-deprecated-DT-sy.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 2d751203aacf86a1b301a188d8551c7da91043ab Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Tue, 2 Mar 2021 20:00:12 +0100 -Subject: [PATCH] mtd: parsers: ofpart: limit parsing of deprecated DT syntax -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -For backward compatibility ofpart still supports the old syntax like: -spi-flash@0 { - compatible = "jedec,spi-nor"; - reg = <0x0>; - - partition@0 { - label = "bootloader"; - reg = <0x0 0x100000>; - }; -}; -(without "partitions" subnode). - -There is no reason however to support nested partitions without a clear -"compatible" string like: -partitions { - compatible = "fixed-partitions"; - #address-cells = <1>; - #size-cells = <1>; - - partition@0 { - label = "bootloader"; - reg = <0x0 0x100000>; - - partition@0 { - label = "config"; - reg = <0x80000 0x80000>; - }; - }; -}; -(we never officially supported or documented that). - -Make sure ofpart doesn't attempt to parse above. - -Cc: Ansuel Smith <ansuelsmth@gmail.com> -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210302190012.1255-1-zajec5@gmail.com ---- - drivers/mtd/parsers/ofpart_core.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/drivers/mtd/parsers/ofpart_core.c -+++ b/drivers/mtd/parsers/ofpart_core.c -@@ -53,7 +53,7 @@ static int parse_fixed_partitions(struct - return 0; - - ofpart_node = of_get_child_by_name(mtd_node, "partitions"); -- if (!ofpart_node) { -+ if (!ofpart_node && !mtd_is_partition(master)) { - /* - * We might get here even when ofpart isn't used at all (e.g., - * when using another parser), so don't be louder than -@@ -64,6 +64,8 @@ static int parse_fixed_partitions(struct - ofpart_node = mtd_node; - dedicated = false; - } -+ if (!ofpart_node) -+ return 0; - - of_id = of_match_node(parse_ofpart_match_table, ofpart_node); - if (dedicated && !of_id) { diff --git a/target/linux/generic/backport-5.4/405-v5.13-mtd-parsers-ofpart-make-symbol-bcm4908_partitions_qu.patch b/target/linux/generic/backport-5.4/405-v5.13-mtd-parsers-ofpart-make-symbol-bcm4908_partitions_qu.patch deleted file mode 100644 index f1b778a6e1..0000000000 --- a/target/linux/generic/backport-5.4/405-v5.13-mtd-parsers-ofpart-make-symbol-bcm4908_partitions_qu.patch +++ /dev/null @@ -1,34 +0,0 @@ -From b87b6d2d6f540e29c3f98e1572d64e560d73d6c1 Mon Sep 17 00:00:00 2001 -From: Wei Yongjun <weiyongjun1@huawei.com> -Date: Thu, 4 Mar 2021 06:46:00 +0000 -Subject: [PATCH] mtd: parsers: ofpart: make symbol 'bcm4908_partitions_quirks' - static - -The sparse tool complains as follows: - -drivers/mtd/parsers/ofpart_core.c:25:32: warning: - symbol 'bcm4908_partitions_quirks' was not declared. Should it be static? - -This symbol is not used outside of ofpart_core.c, so this -commit marks it static. - -Fixes: 457da931b608 ("mtd: parsers: ofpart: support BCM4908 fixed partitions") -Reported-by: Hulk Robot <hulkci@huawei.com> -Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210304064600.3279138-1-weiyongjun1@huawei.com ---- - drivers/mtd/parsers/ofpart_core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/mtd/parsers/ofpart_core.c -+++ b/drivers/mtd/parsers/ofpart_core.c -@@ -22,7 +22,7 @@ struct fixed_partitions_quirks { - int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts); - }; - --struct fixed_partitions_quirks bcm4908_partitions_quirks = { -+static struct fixed_partitions_quirks bcm4908_partitions_quirks = { - .post_parse = bcm4908_partitions_post_parse, - }; - diff --git a/target/linux/generic/backport-5.4/406-v5.13-0001-mtd-core-add-nvmem-cells-compatible-to-parse-mtd-as-.patch b/target/linux/generic/backport-5.4/406-v5.13-0001-mtd-core-add-nvmem-cells-compatible-to-parse-mtd-as-.patch deleted file mode 100644 index ecea743d87..0000000000 --- a/target/linux/generic/backport-5.4/406-v5.13-0001-mtd-core-add-nvmem-cells-compatible-to-parse-mtd-as-.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 658c4448bbbf02a143abf1b89d09a3337ebd3ba6 Mon Sep 17 00:00:00 2001 -From: Ansuel Smith <ansuelsmth@gmail.com> -Date: Fri, 12 Mar 2021 07:28:19 +0100 -Subject: [PATCH] mtd: core: add nvmem-cells compatible to parse mtd as nvmem - cells -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Partitions that contains the nvmem-cells compatible will register -their direct subonodes as nvmem cells and the node will be treated as a -nvmem provider. - -Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com> -Tested-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210312062830.20548-1-ansuelsmth@gmail.com ---- - drivers/mtd/mtdcore.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/drivers/mtd/mtdcore.c -+++ b/drivers/mtd/mtdcore.c -@@ -559,6 +559,7 @@ static int mtd_nvmem_reg_read(void *priv - - static int mtd_nvmem_add(struct mtd_info *mtd) - { -+ struct device_node *node = mtd_get_of_node(mtd); - struct nvmem_config config = {}; - - config.id = -1; -@@ -571,7 +572,7 @@ static int mtd_nvmem_add(struct mtd_info - config.stride = 1; - config.read_only = true; - config.root_only = true; -- config.no_of_node = true; -+ config.no_of_node = !of_device_is_compatible(node, "nvmem-cells"); - config.priv = mtd; - - mtd->nvmem = nvmem_register(&config); diff --git a/target/linux/generic/backport-5.4/406-v5.13-0002-dt-bindings-nvmem-drop-nodename-restriction.patch b/target/linux/generic/backport-5.4/406-v5.13-0002-dt-bindings-nvmem-drop-nodename-restriction.patch deleted file mode 100644 index c0515bd571..0000000000 --- a/target/linux/generic/backport-5.4/406-v5.13-0002-dt-bindings-nvmem-drop-nodename-restriction.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 52981a0fa9f7d68641e0e6bb584054c6d9eb2056 Mon Sep 17 00:00:00 2001 -From: Ansuel Smith <ansuelsmth@gmail.com> -Date: Fri, 12 Mar 2021 07:28:20 +0100 -Subject: [PATCH] dt-bindings: nvmem: drop $nodename restriction - -Drop $nodename restriction as now mtd partition can also be used as -nvmem provider. - -Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com> -Reviewed-by: Rob Herring <robh@kernel.org> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210312062830.20548-2-ansuelsmth@gmail.com ---- - Documentation/devicetree/bindings/nvmem/nvmem.yaml | 3 --- - 1 file changed, 3 deletions(-) - ---- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml -+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml -@@ -20,9 +20,6 @@ description: | - storage device. - - properties: -- $nodename: -- pattern: "^(eeprom|efuse|nvram)(@.*|-[0-9a-f])*$" -- - "#address-cells": - const: 1 - diff --git a/target/linux/generic/backport-5.4/406-v5.13-0003-dt-bindings-mtd-Document-use-of-nvmem-cells-compatib.patch b/target/linux/generic/backport-5.4/406-v5.13-0003-dt-bindings-mtd-Document-use-of-nvmem-cells-compatib.patch deleted file mode 100644 index 552919f587..0000000000 --- a/target/linux/generic/backport-5.4/406-v5.13-0003-dt-bindings-mtd-Document-use-of-nvmem-cells-compatib.patch +++ /dev/null @@ -1,119 +0,0 @@ -From ac42c46f983e4a9003a7bb91ad44a23ab7b8f534 Mon Sep 17 00:00:00 2001 -From: Ansuel Smith <ansuelsmth@gmail.com> -Date: Fri, 12 Mar 2021 07:28:21 +0100 -Subject: [PATCH] dt-bindings: mtd: Document use of nvmem-cells compatible - -Document nvmem-cells compatible used to treat mtd partitions as a -nvmem provider. - -Signed-off-by: Ansuel Smith <ansuelsmth@gmail.com> -Reviewed-by: Rob Herring <robh@kernel.org> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210312062830.20548-3-ansuelsmth@gmail.com ---- - .../bindings/mtd/partitions/nvmem-cells.yaml | 99 +++++++++++++++++++ - 1 file changed, 99 insertions(+) - create mode 100644 Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml - ---- /dev/null -+++ b/Documentation/devicetree/bindings/mtd/partitions/nvmem-cells.yaml -@@ -0,0 +1,99 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/mtd/partitions/nvmem-cells.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Nvmem cells -+ -+description: | -+ Any partition containing the compatible "nvmem-cells" will register as a -+ nvmem provider. -+ Each direct subnodes represents a nvmem cell following the nvmem binding. -+ Nvmem binding to declare nvmem-cells can be found in: -+ Documentation/devicetree/bindings/nvmem/nvmem.yaml -+ -+maintainers: -+ - Ansuel Smith <ansuelsmth@gmail.com> -+ -+allOf: -+ - $ref: /schemas/nvmem/nvmem.yaml# -+ -+properties: -+ compatible: -+ const: nvmem-cells -+ -+required: -+ - compatible -+ -+additionalProperties: true -+ -+examples: -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ /* ... */ -+ -+ }; -+ art: art@1200000 { -+ compatible = "nvmem-cells"; -+ reg = <0x1200000 0x0140000>; -+ label = "art"; -+ read-only; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ macaddr_gmac1: macaddr_gmac1@0 { -+ reg = <0x0 0x6>; -+ }; -+ -+ macaddr_gmac2: macaddr_gmac2@6 { -+ reg = <0x6 0x6>; -+ }; -+ -+ pre_cal_24g: pre_cal_24g@1000 { -+ reg = <0x1000 0x2f20>; -+ }; -+ -+ pre_cal_5g: pre_cal_5g@5000{ -+ reg = <0x5000 0x2f20>; -+ }; -+ }; -+ - | -+ partitions { -+ compatible = "fixed-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "bootloader"; -+ reg = <0x000000 0x100000>; -+ read-only; -+ }; -+ -+ firmware@100000 { -+ compatible = "brcm,trx"; -+ label = "firmware"; -+ reg = <0x100000 0xe00000>; -+ }; -+ -+ calibration@f00000 { -+ compatible = "nvmem-cells"; -+ label = "calibration"; -+ reg = <0xf00000 0x100000>; -+ ranges = <0 0xf00000 0x100000>; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ wifi0@0 { -+ reg = <0x000000 0x080000>; -+ }; -+ -+ wifi1@80000 { -+ reg = <0x080000 0x080000>; -+ }; -+ }; -+ }; diff --git a/target/linux/generic/backport-5.4/407-v5.13-0001-dt-bindings-mtd-add-binding-for-Linksys-Northstar-pa.patch b/target/linux/generic/backport-5.4/407-v5.13-0001-dt-bindings-mtd-add-binding-for-Linksys-Northstar-pa.patch deleted file mode 100644 index 35a4afd67b..0000000000 --- a/target/linux/generic/backport-5.4/407-v5.13-0001-dt-bindings-mtd-add-binding-for-Linksys-Northstar-pa.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 2fa7294175c76e1ec568aa75c1891fd908728c8d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Fri, 12 Mar 2021 14:49:18 +0100 -Subject: [PATCH] dt-bindings: mtd: add binding for Linksys Northstar - partitions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Linksys on Broadcom Northstar devices uses fixed flash layout with -multiple firmware partitions. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Reviewed-by: Rob Herring <robh@kernel.org> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-1-zajec5@gmail.com ---- - .../mtd/partitions/linksys,ns-partitions.yaml | 74 +++++++++++++++++++ - 1 file changed, 74 insertions(+) - create mode 100644 Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml - ---- /dev/null -+++ b/Documentation/devicetree/bindings/mtd/partitions/linksys,ns-partitions.yaml -@@ -0,0 +1,74 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/mtd/partitions/linksys,ns-partitions.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Linksys Northstar partitioning -+ -+description: | -+ Linksys devices based on Broadcom Northstar architecture often use two -+ firmware partitions. One is used for regular booting, the other is treated as -+ fallback. -+ -+ This binding allows defining all fixed partitions and marking those containing -+ firmware. System can use that information e.g. for booting or flashing -+ purposes. -+ -+maintainers: -+ - Rafał Miłecki <rafal@milecki.pl> -+ -+properties: -+ compatible: -+ const: linksys,ns-partitions -+ -+ "#address-cells": -+ enum: [ 1, 2 ] -+ -+ "#size-cells": -+ enum: [ 1, 2 ] -+ -+patternProperties: -+ "^partition@[0-9a-f]+$": -+ $ref: "partition.yaml#" -+ properties: -+ compatible: -+ items: -+ - const: linksys,ns-firmware -+ - const: brcm,trx -+ unevaluatedProperties: false -+ -+required: -+ - "#address-cells" -+ - "#size-cells" -+ -+additionalProperties: false -+ -+examples: -+ - | -+ partitions { -+ compatible = "linksys,ns-partitions"; -+ #address-cells = <1>; -+ #size-cells = <1>; -+ -+ partition@0 { -+ label = "boot"; -+ reg = <0x0 0x100000>; -+ read-only; -+ }; -+ -+ partition@100000 { -+ label = "nvram"; -+ reg = <0x100000 0x100000>; -+ }; -+ -+ partition@200000 { -+ compatible = "linksys,ns-firmware", "brcm,trx"; -+ reg = <0x200000 0xf00000>; -+ }; -+ -+ partition@1100000 { -+ compatible = "linksys,ns-firmware", "brcm,trx"; -+ reg = <0x1100000 0xf00000>; -+ }; -+ }; diff --git a/target/linux/generic/backport-5.4/407-v5.13-0002-mtd-parsers-ofpart-support-Linksys-Northstar-partiti.patch b/target/linux/generic/backport-5.4/407-v5.13-0002-mtd-parsers-ofpart-support-Linksys-Northstar-partiti.patch deleted file mode 100644 index 75eb9391ae..0000000000 --- a/target/linux/generic/backport-5.4/407-v5.13-0002-mtd-parsers-ofpart-support-Linksys-Northstar-partiti.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 7134a2d026d942210b4d26d6059c9d979ca7866e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Fri, 12 Mar 2021 14:49:19 +0100 -Subject: [PATCH] mtd: parsers: ofpart: support Linksys Northstar partitions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This allows extending ofpart parser with support for Linksys Northstar -devices. That support uses recently added quirks mechanism. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com> -Link: https://lore.kernel.org/linux-mtd/20210312134919.7767-2-zajec5@gmail.com ---- - drivers/mtd/parsers/Kconfig | 10 +++++ - drivers/mtd/parsers/Makefile | 1 + - drivers/mtd/parsers/ofpart_core.c | 6 +++ - drivers/mtd/parsers/ofpart_linksys_ns.c | 50 +++++++++++++++++++++++++ - drivers/mtd/parsers/ofpart_linksys_ns.h | 18 +++++++++ - 5 files changed, 85 insertions(+) - create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.c - create mode 100644 drivers/mtd/parsers/ofpart_linksys_ns.h - ---- a/drivers/mtd/parsers/Kconfig -+++ b/drivers/mtd/parsers/Kconfig -@@ -76,6 +76,16 @@ config MTD_OF_PARTS_BCM4908 - that can have multiple "firmware" partitions. It takes care of - finding currently used one and backup ones. - -+config MTD_OF_PARTS_LINKSYS_NS -+ bool "Linksys Northstar partitioning support" -+ depends on MTD_OF_PARTS && (ARCH_BCM_5301X || ARCH_BCM4908 || COMPILE_TEST) -+ default ARCH_BCM_5301X -+ help -+ This provides partitions parser for Linksys devices based on Broadcom -+ Northstar architecture. Linksys commonly uses fixed flash layout with -+ two "firmware" partitions. Currently used firmware has to be detected -+ using CFE environment variable. -+ - config MTD_PARSER_IMAGETAG - tristate "Parser for BCM963XX Image Tag format partitions" - depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST ---- a/drivers/mtd/parsers/Makefile -+++ b/drivers/mtd/parsers/Makefile -@@ -6,6 +6,7 @@ obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdl - obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o - ofpart-y += ofpart_core.o - ofpart-$(CONFIG_MTD_OF_PARTS_BCM4908) += ofpart_bcm4908.o -+ofpart-$(CONFIG_MTD_OF_PARTS_LINKSYS_NS)+= ofpart_linksys_ns.o - obj-$(CONFIG_MTD_PARSER_IMAGETAG) += parser_imagetag.o - obj-$(CONFIG_MTD_AFS_PARTS) += afs.o - obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o ---- a/drivers/mtd/parsers/ofpart_core.c -+++ b/drivers/mtd/parsers/ofpart_core.c -@@ -17,6 +17,7 @@ - #include <linux/mtd/partitions.h> - - #include "ofpart_bcm4908.h" -+#include "ofpart_linksys_ns.h" - - struct fixed_partitions_quirks { - int (*post_parse)(struct mtd_info *mtd, struct mtd_partition *parts, int nr_parts); -@@ -26,6 +27,10 @@ static struct fixed_partitions_quirks bc - .post_parse = bcm4908_partitions_post_parse, - }; - -+static struct fixed_partitions_quirks linksys_ns_partitions_quirks = { -+ .post_parse = linksys_ns_partitions_post_parse, -+}; -+ - static const struct of_device_id parse_ofpart_match_table[]; - - static bool node_has_compatible(struct device_node *pp) -@@ -164,6 +169,7 @@ static const struct of_device_id parse_o - { .compatible = "fixed-partitions" }, - /* Customized */ - { .compatible = "brcm,bcm4908-partitions", .data = &bcm4908_partitions_quirks, }, -+ { .compatible = "linksys,ns-partitions", .data = &linksys_ns_partitions_quirks, }, - {}, - }; - MODULE_DEVICE_TABLE(of, parse_ofpart_match_table); ---- /dev/null -+++ b/drivers/mtd/parsers/ofpart_linksys_ns.c -@@ -0,0 +1,50 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2021 Rafał Miłecki <rafal@milecki.pl> -+ */ -+ -+#include <linux/bcm47xx_nvram.h> -+#include <linux/mtd/mtd.h> -+#include <linux/mtd/partitions.h> -+ -+#include "ofpart_linksys_ns.h" -+ -+#define NVRAM_BOOT_PART "bootpartition" -+ -+static int ofpart_linksys_ns_bootpartition(void) -+{ -+ char buf[4]; -+ int bootpartition; -+ -+ /* Check CFE environment variable */ -+ if (bcm47xx_nvram_getenv(NVRAM_BOOT_PART, buf, sizeof(buf)) > 0) { -+ if (!kstrtoint(buf, 0, &bootpartition)) -+ return bootpartition; -+ pr_warn("Failed to parse %s value \"%s\"\n", NVRAM_BOOT_PART, -+ buf); -+ } else { -+ pr_warn("Failed to get NVRAM \"%s\"\n", NVRAM_BOOT_PART); -+ } -+ -+ return 0; -+} -+ -+int linksys_ns_partitions_post_parse(struct mtd_info *mtd, -+ struct mtd_partition *parts, -+ int nr_parts) -+{ -+ int bootpartition = ofpart_linksys_ns_bootpartition(); -+ int trx_idx = 0; -+ int i; -+ -+ for (i = 0; i < nr_parts; i++) { -+ if (of_device_is_compatible(parts[i].of_node, "linksys,ns-firmware")) { -+ if (trx_idx++ == bootpartition) -+ parts[i].name = "firmware"; -+ else -+ parts[i].name = "backup"; -+ } -+ } -+ -+ return 0; -+} ---- /dev/null -+++ b/drivers/mtd/parsers/ofpart_linksys_ns.h -@@ -0,0 +1,18 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef __OFPART_LINKSYS_NS_H -+#define __OFPART_LINKSYS_NS_H -+ -+#ifdef CONFIG_MTD_OF_PARTS_LINKSYS_NS -+int linksys_ns_partitions_post_parse(struct mtd_info *mtd, -+ struct mtd_partition *parts, -+ int nr_parts); -+#else -+static inline int linksys_ns_partitions_post_parse(struct mtd_info *mtd, -+ struct mtd_partition *parts, -+ int nr_parts) -+{ -+ return -EOPNOTSUPP; -+} -+#endif -+ -+#endif diff --git a/target/linux/generic/backport-5.4/408-v5.13-mtd-cfi_cmdset_0002-Disable-buffered-writes-for-AMD.patch b/target/linux/generic/backport-5.4/408-v5.13-mtd-cfi_cmdset_0002-Disable-buffered-writes-for-AMD.patch deleted file mode 100644 index 3af641e62e..0000000000 --- a/target/linux/generic/backport-5.4/408-v5.13-mtd-cfi_cmdset_0002-Disable-buffered-writes-for-AMD.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 7e4404113686868858a34210c28ae122e967aa64 Mon Sep 17 00:00:00 2001 -From: Mauri Sandberg <sandberg@mailfence.com> -Date: Tue, 9 Mar 2021 19:48:59 +0200 -Subject: [PATCH] mtd: cfi_cmdset_0002: Disable buffered writes for AMD chip - 0x2201 - -Buffer writes do not work with AMD chip 0x2201. The chip in question -is a AMD/Spansion/Cypress Semiconductor S29GL256N and datasheet [1] -talks about writing buffers being possible. While waiting for a neater -solution resort to writing word-sized chunks only. - -Without the patch kernel logs will be flooded with entries like below: - -jffs2_scan_eraseblock(): End of filesystem marker found at 0x0 -jffs2_build_filesystem(): unlocking the mtd device... -done. -jffs2_build_filesystem(): erasing all blocks after the end marker... -MTD do_write_buffer_wait(): software timeout, address:0x01ec000a. -jffs2: Write clean marker to block at 0x01920000 failed: -5 -MTD do_write_buffer_wait(): software timeout, address:0x01e2000a. -jffs2: Write clean marker to block at 0x01880000 failed: -5 -MTD do_write_buffer_wait(): software timeout, address:0x01e0000a. -jffs2: Write clean marker to block at 0x01860000 failed: -5 -MTD do_write_buffer_wait(): software timeout, address:0x01dc000a. -jffs2: Write clean marker to block at 0x01820000 failed: -5 -MTD do_write_buffer_wait(): software timeout, address:0x01da000a. -jffs2: Write clean marker to block at 0x01800000 failed: -5 -... - -Tested on a Buffalo wzr-hp-g300nh running kernel 5.10.16. - -[1] https://www.cypress.com/file/219941/download -or https://datasheetspdf.com/pdf-file/565708/SPANSION/S29GL256N/1 - -Signed-off-by: Mauri Sandberg <sandberg@mailfence.com> -Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com> -Link: https://lore.kernel.org/r/20210309174859.362060-1-sandberg@mailfence.com ---- - drivers/mtd/chips/cfi_cmdset_0002.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/drivers/mtd/chips/cfi_cmdset_0002.c -+++ b/drivers/mtd/chips/cfi_cmdset_0002.c -@@ -272,6 +272,10 @@ static void fixup_use_write_buffers(stru - { - struct map_info *map = mtd->priv; - struct cfi_private *cfi = map->fldrv_priv; -+ -+ if (cfi->mfr == CFI_MFR_AMD && cfi->id == 0x2201) -+ return; -+ - if (cfi->cfiq->BufWriteTimeoutTyp) { - pr_debug("Using buffer write method\n"); - mtd->_write = cfi_amdstd_write_buffers; diff --git a/target/linux/generic/backport-5.4/410-mtd-fix-calculating-partition-end-address.patch b/target/linux/generic/backport-5.4/410-mtd-fix-calculating-partition-end-address.patch deleted file mode 100644 index 1eae015b28..0000000000 --- a/target/linux/generic/backport-5.4/410-mtd-fix-calculating-partition-end-address.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 9 Mar 2020 08:30:19 +0100 -Subject: [PATCH] mtd: fix calculating partition end address -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This fixes check for partitions that don't start at beginning of their -parents. Missing partition's offset in formula could result in forcing -read-only incorrectly. - -Fixes: 6750f61a13a0 ("mtd: improve calculating partition boundaries when checking for alignment") -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> ---- - drivers/mtd/mtdpart.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/mtd/mtdpart.c -+++ b/drivers/mtd/mtdpart.c -@@ -524,7 +524,7 @@ static struct mtd_part *allocate_partiti - part->name); - } - -- tmp = part_absolute_offset(parent) + slave->mtd.size; -+ tmp = part_absolute_offset(parent) + slave->offset + slave->mtd.size; - remainder = do_div(tmp, wr_alignment); - if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) { - slave->mtd.flags &= ~MTD_WRITEABLE; diff --git a/target/linux/generic/backport-5.4/500-v5.13-ubifs-default-to-zstd-compression.patch b/target/linux/generic/backport-5.4/500-v5.13-ubifs-default-to-zstd-compression.patch deleted file mode 100644 index dd50c19c27..0000000000 --- a/target/linux/generic/backport-5.4/500-v5.13-ubifs-default-to-zstd-compression.patch +++ /dev/null @@ -1,25 +0,0 @@ -From dcdf415b740923530dc71d89fecc8361078473f5 Mon Sep 17 00:00:00 2001 -From: Rui Salvaterra <rsalvaterra@gmail.com> -Date: Mon, 5 Apr 2021 16:11:55 +0100 -Subject: [PATCH] ubifs: default to zstd compression - -Compared to lzo and zlib, zstd is the best all-around performer, both in terms -of speed and compression ratio. Set it as the default, if available. - -Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com> ---- - fs/ubifs/sb.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/fs/ubifs/sb.c -+++ b/fs/ubifs/sb.c -@@ -53,6 +53,9 @@ - - static int get_default_compressor(struct ubifs_info *c) - { -+ if (ubifs_compr_present(c, UBIFS_COMPR_ZSTD)) -+ return UBIFS_COMPR_ZSTD; -+ - if (ubifs_compr_present(c, UBIFS_COMPR_LZO)) - return UBIFS_COMPR_LZO; - diff --git a/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch b/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch deleted file mode 100644 index 35aeb96251..0000000000 --- a/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: Felix Fietkau <nbd@nbd.name> -Date: Mon, 8 Feb 2021 11:34:08 -0800 -Subject: [PATCH] net: extract napi poll functionality to __napi_poll() - -This commit introduces a new function __napi_poll() which does the main -logic of the existing napi_poll() function, and will be called by other -functions in later commits. -This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and -is proposed as part of the patch to move napi work to work_queue -context. -This commit by itself is a code restructure. - -Signed-off-by: Felix Fietkau <nbd@nbd.name> -Signed-off-by: Wei Wang <weiwan@google.com> -Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -6325,15 +6325,10 @@ void netif_napi_del(struct napi_struct * - } - EXPORT_SYMBOL(netif_napi_del); - --static int napi_poll(struct napi_struct *n, struct list_head *repoll) -+static int __napi_poll(struct napi_struct *n, bool *repoll) - { -- void *have; - int work, weight; - -- list_del_init(&n->poll_list); -- -- have = netpoll_poll_lock(n); -- - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race -@@ -6351,7 +6346,7 @@ static int napi_poll(struct napi_struct - WARN_ON_ONCE(work > weight); - - if (likely(work < weight)) -- goto out_unlock; -+ return work; - - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code -@@ -6360,7 +6355,7 @@ static int napi_poll(struct napi_struct - */ - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); -- goto out_unlock; -+ return work; - } - - if (n->gro_bitmask) { -@@ -6378,12 +6373,29 @@ static int napi_poll(struct napi_struct - if (unlikely(!list_empty(&n->poll_list))) { - pr_warn_once("%s: Budget exhausted after napi rescheduled\n", - n->dev ? n->dev->name : "backlog"); -- goto out_unlock; -+ return work; - } - -- list_add_tail(&n->poll_list, repoll); -+ *repoll = true; -+ -+ return work; -+} -+ -+static int napi_poll(struct napi_struct *n, struct list_head *repoll) -+{ -+ bool do_repoll = false; -+ void *have; -+ int work; -+ -+ list_del_init(&n->poll_list); -+ -+ have = netpoll_poll_lock(n); -+ -+ work = __napi_poll(n, &do_repoll); -+ -+ if (do_repoll) -+ list_add_tail(&n->poll_list, repoll); - --out_unlock: - netpoll_poll_unlock(have); - - return work; diff --git a/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch deleted file mode 100644 index 0c548f331a..0000000000 --- a/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch +++ /dev/null @@ -1,261 +0,0 @@ -From: Wei Wang <weiwan@google.com> -Date: Mon, 8 Feb 2021 11:34:09 -0800 -Subject: [PATCH] net: implement threaded-able napi poll loop support - -This patch allows running each napi poll loop inside its own -kernel thread. -The kthread is created during netif_napi_add() if dev->threaded -is set. And threaded mode is enabled in napi_enable(). We will -provide a way to set dev->threaded and enable threaded mode -without a device up/down in the following patch. - -Once that threaded mode is enabled and the kthread is -started, napi_schedule() will wake-up such thread instead -of scheduling the softirq. - -The threaded poll loop behaves quite likely the net_rx_action, -but it does not have to manipulate local irqs and uses -an explicit scheduling point based on netdev_budget. - -Co-developed-by: Paolo Abeni <pabeni@redhat.com> -Signed-off-by: Paolo Abeni <pabeni@redhat.com> -Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> -Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> -Co-developed-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Wei Wang <weiwan@google.com> -Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -340,6 +340,7 @@ struct napi_struct { - struct list_head dev_list; - struct hlist_node napi_hash_node; - unsigned int napi_id; -+ struct task_struct *thread; - }; - - enum { -@@ -350,6 +351,7 @@ enum { - NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ - NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ -+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ - }; - - enum { -@@ -360,6 +362,7 @@ enum { - NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), - NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), -+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), - }; - - enum gro_result { -@@ -504,20 +507,7 @@ bool napi_hash_del(struct napi_struct *n - */ - void napi_disable(struct napi_struct *n); - --/** -- * napi_enable - enable NAPI scheduling -- * @n: NAPI context -- * -- * Resume NAPI from being scheduled on this context. -- * Must be paired with napi_disable. -- */ --static inline void napi_enable(struct napi_struct *n) --{ -- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); -- smp_mb__before_atomic(); -- clear_bit(NAPI_STATE_SCHED, &n->state); -- clear_bit(NAPI_STATE_NPSVC, &n->state); --} -+void napi_enable(struct napi_struct *n); - - /** - * napi_synchronize - wait until NAPI is not running -@@ -1783,6 +1773,8 @@ enum netdev_ml_priv_type { - * - * @wol_enabled: Wake-on-LAN is enabled - * -+ * @threaded: napi threaded mode is enabled -+ * - * FIXME: cleanup struct net_device such that network protocol info - * moves out. - */ -@@ -2075,6 +2067,7 @@ struct net_device { - struct lock_class_key addr_list_lock_key; - bool proto_down; - unsigned wol_enabled:1; -+ unsigned threaded:1; - }; - #define to_net_dev(d) container_of(d, struct net_device, dev) - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -91,6 +91,7 @@ - #include <linux/etherdevice.h> - #include <linux/ethtool.h> - #include <linux/skbuff.h> -+#include <linux/kthread.h> - #include <linux/bpf.h> - #include <linux/bpf_trace.h> - #include <net/net_namespace.h> -@@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi - } - EXPORT_SYMBOL(netdev_notify_peers); - -+static int napi_threaded_poll(void *data); -+ -+static int napi_kthread_create(struct napi_struct *n) -+{ -+ int err = 0; -+ -+ /* Create and wake up the kthread once to put it in -+ * TASK_INTERRUPTIBLE mode to avoid the blocked task -+ * warning and work with loadavg. -+ */ -+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", -+ n->dev->name, n->napi_id); -+ if (IS_ERR(n->thread)) { -+ err = PTR_ERR(n->thread); -+ pr_err("kthread_run failed with err %d\n", err); -+ n->thread = NULL; -+ } -+ -+ return err; -+} -+ - static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) - { - const struct net_device_ops *ops = dev->netdev_ops; -@@ -3888,6 +3910,21 @@ int gro_normal_batch __read_mostly = 8; - static inline void ____napi_schedule(struct softnet_data *sd, - struct napi_struct *napi) - { -+ struct task_struct *thread; -+ -+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) { -+ /* Paired with smp_mb__before_atomic() in -+ * napi_enable(). Use READ_ONCE() to guarantee -+ * a complete read on napi->thread. Only call -+ * wake_up_process() when it's not NULL. -+ */ -+ thread = READ_ONCE(napi->thread); -+ if (thread) { -+ wake_up_process(thread); -+ return; -+ } -+ } -+ - list_add_tail(&napi->poll_list, &sd->poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - } -@@ -6279,6 +6316,12 @@ void netif_napi_add(struct net_device *d - set_bit(NAPI_STATE_NPSVC, &napi->state); - list_add_rcu(&napi->dev_list, &dev->napi_list); - napi_hash_add(napi); -+ /* Create kthread for this napi if dev->threaded is set. -+ * Clear dev->threaded if kthread creation failed so that -+ * threaded mode will not be enabled in napi_enable(). -+ */ -+ if (dev->threaded && napi_kthread_create(napi)) -+ dev->threaded = 0; - } - EXPORT_SYMBOL(netif_napi_add); - -@@ -6295,9 +6338,28 @@ void napi_disable(struct napi_struct *n) - hrtimer_cancel(&n->timer); - - clear_bit(NAPI_STATE_DISABLE, &n->state); -+ clear_bit(NAPI_STATE_THREADED, &n->state); - } - EXPORT_SYMBOL(napi_disable); - -+/** -+ * napi_enable - enable NAPI scheduling -+ * @n: NAPI context -+ * -+ * Resume NAPI from being scheduled on this context. -+ * Must be paired with napi_disable. -+ */ -+void napi_enable(struct napi_struct *n) -+{ -+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); -+ smp_mb__before_atomic(); -+ clear_bit(NAPI_STATE_SCHED, &n->state); -+ clear_bit(NAPI_STATE_NPSVC, &n->state); -+ if (n->dev->threaded && n->thread) -+ set_bit(NAPI_STATE_THREADED, &n->state); -+} -+EXPORT_SYMBOL(napi_enable); -+ - static void flush_gro_hash(struct napi_struct *napi) - { - int i; -@@ -6322,6 +6384,11 @@ void netif_napi_del(struct napi_struct * - - flush_gro_hash(napi); - napi->gro_bitmask = 0; -+ -+ if (napi->thread) { -+ kthread_stop(napi->thread); -+ napi->thread = NULL; -+ } - } - EXPORT_SYMBOL(netif_napi_del); - -@@ -6401,6 +6468,51 @@ static int napi_poll(struct napi_struct - return work; - } - -+static int napi_thread_wait(struct napi_struct *napi) -+{ -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ while (!kthread_should_stop() && !napi_disable_pending(napi)) { -+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) { -+ WARN_ON(!list_empty(&napi->poll_list)); -+ __set_current_state(TASK_RUNNING); -+ return 0; -+ } -+ -+ schedule(); -+ set_current_state(TASK_INTERRUPTIBLE); -+ } -+ __set_current_state(TASK_RUNNING); -+ return -1; -+} -+ -+static int napi_threaded_poll(void *data) -+{ -+ struct napi_struct *napi = data; -+ void *have; -+ -+ while (!napi_thread_wait(napi)) { -+ for (;;) { -+ bool repoll = false; -+ -+ local_bh_disable(); -+ -+ have = netpoll_poll_lock(napi); -+ __napi_poll(napi, &repoll); -+ netpoll_poll_unlock(have); -+ -+ __kfree_skb_flush(); -+ local_bh_enable(); -+ -+ if (!repoll) -+ break; -+ -+ cond_resched(); -+ } -+ } -+ return 0; -+} -+ - static __latent_entropy void net_rx_action(struct softirq_action *h) - { - struct softnet_data *sd = this_cpu_ptr(&softnet_data); diff --git a/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch b/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch deleted file mode 100644 index bdc34a15ea..0000000000 --- a/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch +++ /dev/null @@ -1,177 +0,0 @@ -From: Wei Wang <weiwan@google.com> -Date: Mon, 8 Feb 2021 11:34:10 -0800 -Subject: [PATCH] net: add sysfs attribute to control napi threaded mode - -This patch adds a new sysfs attribute to the network device class. -Said attribute provides a per-device control to enable/disable the -threaded mode for all the napi instances of the given network device, -without the need for a device up/down. -User sets it to 1 or 0 to enable or disable threaded mode. -Note: when switching between threaded and the current softirq based mode -for a napi instance, it will not immediately take effect if the napi is -currently being polled. The mode switch will happen for the next time -napi_schedule() is called. - -Co-developed-by: Paolo Abeni <pabeni@redhat.com> -Signed-off-by: Paolo Abeni <pabeni@redhat.com> -Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> -Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> -Co-developed-by: Felix Fietkau <nbd@nbd.name> -Signed-off-by: Felix Fietkau <nbd@nbd.name> -Signed-off-by: Wei Wang <weiwan@google.com> -Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/Documentation/ABI/testing/sysfs-class-net -+++ b/Documentation/ABI/testing/sysfs-class-net -@@ -301,3 +301,18 @@ Contact: netdev@vger.kernel.org - Description: - 32-bit unsigned integer counting the number of times the link has - been down -+ -+What: /sys/class/net/<iface>/threaded -+Date: Jan 2021 -+KernelVersion: 5.12 -+Contact: netdev@vger.kernel.org -+Description: -+ Boolean value to control the threaded mode per device. User could -+ set this value to enable/disable threaded mode for all napi -+ belonging to this device, without the need to do device up/down. -+ -+ Possible values: -+ == ================================== -+ 0 threaded mode disabled for this dev -+ 1 threaded mode enabled for this dev -+ == ================================== ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -498,6 +498,8 @@ static inline bool napi_complete(struct - */ - bool napi_hash_del(struct napi_struct *napi); - -+int dev_set_threaded(struct net_device *dev, bool threaded); -+ - /** - * napi_disable - prevent NAPI from scheduling - * @n: NAPI context ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -3914,8 +3914,9 @@ static inline void ____napi_schedule(str - - if (test_bit(NAPI_STATE_THREADED, &napi->state)) { - /* Paired with smp_mb__before_atomic() in -- * napi_enable(). Use READ_ONCE() to guarantee -- * a complete read on napi->thread. Only call -+ * napi_enable()/dev_set_threaded(). -+ * Use READ_ONCE() to guarantee a complete -+ * read on napi->thread. Only call - * wake_up_process() when it's not NULL. - */ - thread = READ_ONCE(napi->thread); -@@ -6293,6 +6294,49 @@ static void init_gro_hash(struct napi_st - napi->gro_bitmask = 0; - } - -+int dev_set_threaded(struct net_device *dev, bool threaded) -+{ -+ struct napi_struct *napi; -+ int err = 0; -+ -+ if (dev->threaded == threaded) -+ return 0; -+ -+ if (threaded) { -+ list_for_each_entry(napi, &dev->napi_list, dev_list) { -+ if (!napi->thread) { -+ err = napi_kthread_create(napi); -+ if (err) { -+ threaded = false; -+ break; -+ } -+ } -+ } -+ } -+ -+ dev->threaded = threaded; -+ -+ /* Make sure kthread is created before THREADED bit -+ * is set. -+ */ -+ smp_mb__before_atomic(); -+ -+ /* Setting/unsetting threaded mode on a napi might not immediately -+ * take effect, if the current napi instance is actively being -+ * polled. In this case, the switch between threaded mode and -+ * softirq mode will happen in the next round of napi_schedule(). -+ * This should not cause hiccups/stalls to the live traffic. -+ */ -+ list_for_each_entry(napi, &dev->napi_list, dev_list) { -+ if (threaded) -+ set_bit(NAPI_STATE_THREADED, &napi->state); -+ else -+ clear_bit(NAPI_STATE_THREADED, &napi->state); -+ } -+ -+ return err; -+} -+ - void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) - { ---- a/net/core/net-sysfs.c -+++ b/net/core/net-sysfs.c -@@ -557,6 +557,45 @@ static ssize_t phys_switch_id_show(struc - } - static DEVICE_ATTR_RO(phys_switch_id); - -+static ssize_t threaded_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct net_device *netdev = to_net_dev(dev); -+ ssize_t ret = -EINVAL; -+ -+ if (!rtnl_trylock()) -+ return restart_syscall(); -+ -+ if (dev_isalive(netdev)) -+ ret = sprintf(buf, fmt_dec, netdev->threaded); -+ -+ rtnl_unlock(); -+ return ret; -+} -+ -+static int modify_napi_threaded(struct net_device *dev, unsigned long val) -+{ -+ int ret; -+ -+ if (list_empty(&dev->napi_list)) -+ return -EOPNOTSUPP; -+ -+ if (val != 0 && val != 1) -+ return -EOPNOTSUPP; -+ -+ ret = dev_set_threaded(dev, val); -+ -+ return ret; -+} -+ -+static ssize_t threaded_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ return netdev_store(dev, attr, buf, len, modify_napi_threaded); -+} -+static DEVICE_ATTR_RW(threaded); -+ - static struct attribute *net_class_attrs[] __ro_after_init = { - &dev_attr_netdev_group.attr, - &dev_attr_type.attr, -@@ -587,6 +626,7 @@ static struct attribute *net_class_attrs - &dev_attr_proto_down.attr, - &dev_attr_carrier_up_count.attr, - &dev_attr_carrier_down_count.attr, -+ &dev_attr_threaded.attr, - NULL, - }; - ATTRIBUTE_GROUPS(net_class); diff --git a/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch b/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch deleted file mode 100644 index 764f33e3fc..0000000000 --- a/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Wei Wang <weiwan@google.com> -Date: Mon, 1 Mar 2021 17:21:13 -0800 -Subject: [PATCH] net: fix race between napi kthread mode and busy poll - -Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to -determine if the kthread owns this napi and could call napi->poll() on -it. However, if socket busy poll is enabled, it is possible that the -busy poll thread grabs this SCHED bit (after the previous napi->poll() -invokes napi_complete_done() and clears SCHED bit) and tries to poll -on the same napi. napi_disable() could grab the SCHED bit as well. -This patch tries to fix this race by adding a new bit -NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in -____napi_schedule() if the threaded mode is enabled, and gets cleared -in napi_complete_done(), and we only poll the napi in kthread if this -bit is set. This helps distinguish the ownership of the napi between -kthread and other scenarios and fixes the race issue. - -Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") -Reported-by: Martin Zaharinov <micron10@gmail.com> -Suggested-by: Jakub Kicinski <kuba@kernel.org> -Signed-off-by: Wei Wang <weiwan@google.com> -Cc: Alexander Duyck <alexanderduyck@fb.com> -Cc: Eric Dumazet <edumazet@google.com> -Cc: Paolo Abeni <pabeni@redhat.com> -Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> ---- - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -352,6 +352,7 @@ enum { - NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ - NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ -+ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ - }; - - enum { -@@ -363,6 +364,7 @@ enum { - NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), - NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), -+ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), - }; - - enum gro_result { ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -3921,6 +3921,8 @@ static inline void ____napi_schedule(str - */ - thread = READ_ONCE(napi->thread); - if (thread) { -+ if (thread->state != TASK_INTERRUPTIBLE) -+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); - wake_up_process(thread); - return; - } -@@ -6081,7 +6083,8 @@ bool napi_complete_done(struct napi_stru - - WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); - -- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); -+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | -+ NAPIF_STATE_SCHED_THREADED); - - /* If STATE_MISSED was set, leave STATE_SCHED set, - * because we will call napi->poll() one more time. -@@ -6514,16 +6517,25 @@ static int napi_poll(struct napi_struct - - static int napi_thread_wait(struct napi_struct *napi) - { -+ bool woken = false; -+ - set_current_state(TASK_INTERRUPTIBLE); - - while (!kthread_should_stop() && !napi_disable_pending(napi)) { -- if (test_bit(NAPI_STATE_SCHED, &napi->state)) { -+ /* Testing SCHED_THREADED bit here to make sure the current -+ * kthread owns this napi and could poll on this napi. -+ * Testing SCHED bit is not enough because SCHED bit might be -+ * set by some other busy poll thread or by napi_disable(). -+ */ -+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { - WARN_ON(!list_empty(&napi->poll_list)); - __set_current_state(TASK_RUNNING); - return 0; - } - - schedule(); -+ /* woken being true indicates this thread owns this napi. */ -+ woken = true; - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); diff --git a/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch b/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch deleted file mode 100644 index 5c48fdf5c1..0000000000 --- a/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Paolo Abeni <pabeni@redhat.com> -Date: Fri, 9 Apr 2021 17:24:17 +0200 -Subject: [PATCH] net: fix hangup on napi_disable for threaded napi - -napi_disable() is subject to an hangup, when the threaded -mode is enabled and the napi is under heavy traffic. - -If the relevant napi has been scheduled and the napi_disable() -kicks in before the next napi_threaded_wait() completes - so -that the latter quits due to the napi_disable_pending() condition, -the existing code leaves the NAPI_STATE_SCHED bit set and the -napi_disable() loop waiting for such bit will hang. - -This patch addresses the issue by dropping the NAPI_STATE_DISABLE -bit test in napi_thread_wait(). The later napi_threaded_poll() -iteration will take care of clearing the NAPI_STATE_SCHED. - -This also addresses a related problem reported by Jakub: -before this patch a napi_disable()/napi_enable() pair killed -the napi thread, effectively disabling the threaded mode. -On the patched kernel napi_disable() simply stops scheduling -the relevant thread. - -v1 -> v2: - - let the main napi_thread_poll() loop clear the SCHED bit - -Reported-by: Jakub Kicinski <kuba@kernel.org> -Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") -Signed-off-by: Paolo Abeni <pabeni@redhat.com> -Reviewed-by: Eric Dumazet <edumazet@google.com> -Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -6521,7 +6521,7 @@ static int napi_thread_wait(struct napi_ - - set_current_state(TASK_INTERRUPTIBLE); - -- while (!kthread_should_stop() && !napi_disable_pending(napi)) { -+ while (!kthread_should_stop()) { - /* Testing SCHED_THREADED bit here to make sure the current - * kthread owns this napi and could poll on this napi. - * Testing SCHED bit is not enough because SCHED bit might be -@@ -6539,6 +6539,7 @@ static int napi_thread_wait(struct napi_ - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); -+ - return -1; - } - diff --git a/target/linux/generic/backport-5.4/610-v5.9-net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch b/target/linux/generic/backport-5.4/610-v5.9-net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch deleted file mode 100644 index f1862943f8..0000000000 --- a/target/linux/generic/backport-5.4/610-v5.9-net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> -Date: Fri, 31 Jul 2020 19:26:16 +0300 -Subject: [PATCH] net: bridge: clear bridge's private skb space on xmit - -We need to clear all of the bridge private skb variables as they can be -stale due to the packet being recirculated through the stack and then -transmitted through the bridge device. Similar memset is already done on -bridge's input. We've seen cases where proxyarp_replied was 1 on routed -multicast packets transmitted through the bridge to ports with neigh -suppress which were getting dropped. Same thing can in theory happen with -the port isolation bit as well. - -Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood") -Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/net/bridge/br_device.c -+++ b/net/bridge/br_device.c -@@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff * - struct ethhdr *eth; - u16 vid = 0; - -+ memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); -+ - rcu_read_lock(); - nf_ops = rcu_dereference(nf_br_ops); - if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { diff --git a/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch b/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch deleted file mode 100644 index 24b76cdca4..0000000000 --- a/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch +++ /dev/null @@ -1,78 +0,0 @@ -From: Alexander Lobakin <alobakin@dlink.ru> -Date: Fri, 15 Nov 2019 12:11:35 +0300 -Subject: [PATCH] net: core: allow fast GRO for skbs with Ethernet header in - head - -Commit 78d3fd0b7de8 ("gro: Only use skb_gro_header for completely -non-linear packets") back in May'09 (v2.6.31-rc1) has changed the -original condition '!skb_headlen(skb)' to -'skb->mac_header == skb->tail' in gro_reset_offset() saying: "Since -the drivers that need this optimisation all provide completely -non-linear packets" (note that this condition has become the current -'skb_mac_header(skb) == skb_tail_pointer(skb)' later with commmit -ced14f6804a9 ("net: Correct comparisons and calculations using -skb->tail and skb-transport_header") without any functional changes). - -For now, we have the following rough statistics for v5.4-rc7: -1) napi_gro_frags: 14 -2) napi_gro_receive with skb->head containing (most of) payload: 83 -3) napi_gro_receive with skb->head containing all the headers: 20 -4) napi_gro_receive with skb->head containing only Ethernet header: 2 - -With the current condition, fast GRO with the usage of -NAPI_GRO_CB(skb)->frag0 is available only in the [1] case. -Packets pushed by [2] and [3] go through the 'slow' path, but -it's not a problem for them as they already contain all the needed -headers in skb->head, so pskb_may_pull() only moves skb->data. - -The layout of skbs in the fourth [4] case at the moment of -dev_gro_receive() is identical to skbs that have come through [1], -as napi_frags_skb() pulls Ethernet header to skb->head. The only -difference is that the mentioned condition is always false for them, -because skb_put() and friends irreversibly alter the tail pointer. -They also go through the 'slow' path, but now every single -pskb_may_pull() in every single .gro_receive() will call the *really* -slow __pskb_pull_tail() to pull headers to head. This significantly -decreases the overall performance for no visible reasons. - -The only two users of method [4] is: -* drivers/staging/qlge -* drivers/net/wireless/iwlwifi (all three variants: dvm, mvm, mvm-mq) - -Note that in case with wireless drivers we can't use [1] -(napi_gro_frags()) at least for now and mac80211 stack always -performs pushes and pulls anyways, so performance hit is inavoidable. - -At the moment of v2.6.31 the mentioned change was necessary (that's -why I don't add the "Fixes:" tag), but it became obsolete since -skb_gro_mac_header() has gone in commit a50e233c50db ("net-gro: -restore frag0 optimization"), so we can simply revert the condition -in gro_reset_offset() to allow skbs from [4] go through the 'fast' -path just like in case [1]. - -This was tested on a 600 MHz MIPS CPU and a custom driver and this -patch gave boosts up to 40 Mbps to method [4] in both directions -comparing to net-next, which made overall performance relatively -close to [1] (without it, [4] is the slowest). - -v2: -- Add more references and explanations to commit message -- Fix some typos ibid -- No functional changes - -Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -5475,8 +5475,7 @@ static inline void skb_gro_reset_offset( - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; - -- if (skb_mac_header(skb) == skb_tail_pointer(skb) && -- pinfo->nr_frags && -+ if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0)) && - (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { - NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); diff --git a/target/linux/generic/backport-5.4/716-v5.5-net-sfp-move-fwnode-parsing-into-sfp-bus-layer.patch b/target/linux/generic/backport-5.4/716-v5.5-net-sfp-move-fwnode-parsing-into-sfp-bus-layer.patch deleted file mode 100644 index 92fe224873..0000000000 --- a/target/linux/generic/backport-5.4/716-v5.5-net-sfp-move-fwnode-parsing-into-sfp-bus-layer.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 4054955f0da08c81d42220cb445820d474f1ac92 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Sat, 14 Sep 2019 14:21:22 +0100 -Subject: [PATCH 614/660] net: sfp: move fwnode parsing into sfp-bus layer - -Rather than parsing the sfp firmware node in phylink, parse it in the -sfp-bus code, so we can re-use this code for PHYs without having to -duplicate the parsing. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/phylink.c | 21 ++++--------- - drivers/net/phy/sfp-bus.c | 65 +++++++++++++++++++++++++-------------- - include/linux/sfp.h | 10 +++--- - 3 files changed, 53 insertions(+), 43 deletions(-) - ---- a/drivers/net/phy/phylink.c -+++ b/drivers/net/phy/phylink.c -@@ -565,26 +565,17 @@ static const struct sfp_upstream_ops sfp - static int phylink_register_sfp(struct phylink *pl, - struct fwnode_handle *fwnode) - { -- struct fwnode_reference_args ref; -+ struct sfp_bus *bus; - int ret; - -- if (!fwnode) -- return 0; -- -- ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL, -- 0, 0, &ref); -- if (ret < 0) { -- if (ret == -ENOENT) -- return 0; -- -- phylink_err(pl, "unable to parse \"sfp\" node: %d\n", -- ret); -+ bus = sfp_register_upstream_node(fwnode, pl, &sfp_phylink_ops); -+ if (IS_ERR(bus)) { -+ ret = PTR_ERR(bus); -+ phylink_err(pl, "unable to attach SFP bus: %d\n", ret); - return ret; - } - -- pl->sfp_bus = sfp_register_upstream(ref.fwnode, pl, &sfp_phylink_ops); -- if (!pl->sfp_bus) -- return -ENOMEM; -+ pl->sfp_bus = bus; - - return 0; - } ---- a/drivers/net/phy/sfp-bus.c -+++ b/drivers/net/phy/sfp-bus.c -@@ -4,6 +4,7 @@ - #include <linux/list.h> - #include <linux/mutex.h> - #include <linux/phylink.h> -+#include <linux/property.h> - #include <linux/rtnetlink.h> - #include <linux/slab.h> - -@@ -520,45 +521,63 @@ static void sfp_upstream_clear(struct sf - } - - /** -- * sfp_register_upstream() - Register the neighbouring device -- * @fwnode: firmware node for the SFP bus -+ * sfp_register_upstream_node() - parse and register the neighbouring device -+ * @fwnode: firmware node for the parent device (MAC or PHY) - * @upstream: the upstream private data - * @ops: the upstream's &struct sfp_upstream_ops - * -- * Register the upstream device (eg, PHY) with the SFP bus. MAC drivers -- * should use phylink, which will call this function for them. Returns -- * a pointer to the allocated &struct sfp_bus. -+ * Parse the parent device's firmware node for a SFP bus, and register the -+ * SFP bus using sfp_register_upstream(). - * -- * On error, returns %NULL. -+ * Returns: on success, a pointer to the sfp_bus structure, -+ * %NULL if no SFP is specified, -+ * on failure, an error pointer value: -+ * corresponding to the errors detailed for -+ * fwnode_property_get_reference_args(). -+ * %-ENOMEM if we failed to allocate the bus. -+ * an error from the upstream's connect_phy() method. - */ --struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, -- void *upstream, -- const struct sfp_upstream_ops *ops) --{ -- struct sfp_bus *bus = sfp_bus_get(fwnode); -- int ret = 0; -- -- if (bus) { -- rtnl_lock(); -- bus->upstream_ops = ops; -- bus->upstream = upstream; -+struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, -+ void *upstream, -+ const struct sfp_upstream_ops *ops) -+{ -+ struct fwnode_reference_args ref; -+ struct sfp_bus *bus; -+ int ret; - -- if (bus->sfp) { -- ret = sfp_register_bus(bus); -- if (ret) -- sfp_upstream_clear(bus); -- } -- rtnl_unlock(); -+ ret = fwnode_property_get_reference_args(fwnode, "sfp", NULL, -+ 0, 0, &ref); -+ if (ret == -ENOENT) -+ return NULL; -+ else if (ret < 0) -+ return ERR_PTR(ret); -+ -+ bus = sfp_bus_get(ref.fwnode); -+ fwnode_handle_put(ref.fwnode); -+ if (!bus) -+ return ERR_PTR(-ENOMEM); -+ -+ rtnl_lock(); -+ bus->upstream_ops = ops; -+ bus->upstream = upstream; -+ -+ if (bus->sfp) { -+ ret = sfp_register_bus(bus); -+ if (ret) -+ sfp_upstream_clear(bus); -+ } else { -+ ret = 0; - } -+ rtnl_unlock(); - - if (ret) { - sfp_bus_put(bus); -- bus = NULL; -+ bus = ERR_PTR(ret); - } - - return bus; - } --EXPORT_SYMBOL_GPL(sfp_register_upstream); -+EXPORT_SYMBOL_GPL(sfp_register_upstream_node); - - /** - * sfp_unregister_upstream() - Unregister sfp bus ---- a/include/linux/sfp.h -+++ b/include/linux/sfp.h -@@ -508,9 +508,9 @@ int sfp_get_module_eeprom(struct sfp_bus - u8 *data); - void sfp_upstream_start(struct sfp_bus *bus); - void sfp_upstream_stop(struct sfp_bus *bus); --struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, -- void *upstream, -- const struct sfp_upstream_ops *ops); -+struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, -+ void *upstream, -+ const struct sfp_upstream_ops *ops); - void sfp_unregister_upstream(struct sfp_bus *bus); - #else - static inline int sfp_parse_port(struct sfp_bus *bus, -@@ -553,11 +553,11 @@ static inline void sfp_upstream_stop(str - { - } - --static inline struct sfp_bus *sfp_register_upstream( -+static inline struct sfp_bus *sfp_register_upstream_node( - struct fwnode_handle *fwnode, void *upstream, - const struct sfp_upstream_ops *ops) - { -- return (struct sfp_bus *)-1; -+ return NULL; - } - - static inline void sfp_unregister_upstream(struct sfp_bus *bus) diff --git a/target/linux/generic/backport-5.4/717-v5.5-net-sfp-rework-upstream-interface.patch b/target/linux/generic/backport-5.4/717-v5.5-net-sfp-rework-upstream-interface.patch deleted file mode 100644 index 9175f2557a..0000000000 --- a/target/linux/generic/backport-5.4/717-v5.5-net-sfp-rework-upstream-interface.patch +++ /dev/null @@ -1,254 +0,0 @@ -From 863b5b6941f9f43b924393b6ba2b36647e7dee42 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Thu, 7 Nov 2019 17:06:08 +0000 -Subject: [PATCH 615/660] net: sfp: rework upstream interface - -The current upstream interface is an all-or-nothing, which is -sub-optimal for future changes, as it doesn't allow the upstream driver -to prepare for the SFP module becoming available, as it is at boot. - -Switch to a find-sfp-bus, add-upstream, del-upstream, put-sfp-bus -interface structure instead, which allows the upstream driver to -prepare for a module being available as soon as add-upstream is called. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/phylink.c | 10 +++-- - drivers/net/phy/sfp-bus.c | 92 +++++++++++++++++++++++++++------------ - include/linux/sfp.h | 25 +++++++---- - 3 files changed, 88 insertions(+), 39 deletions(-) - ---- a/drivers/net/phy/phylink.c -+++ b/drivers/net/phy/phylink.c -@@ -568,7 +568,7 @@ static int phylink_register_sfp(struct p - struct sfp_bus *bus; - int ret; - -- bus = sfp_register_upstream_node(fwnode, pl, &sfp_phylink_ops); -+ bus = sfp_bus_find_fwnode(fwnode); - if (IS_ERR(bus)) { - ret = PTR_ERR(bus); - phylink_err(pl, "unable to attach SFP bus: %d\n", ret); -@@ -577,7 +577,10 @@ static int phylink_register_sfp(struct p - - pl->sfp_bus = bus; - -- return 0; -+ ret = sfp_bus_add_upstream(bus, pl, &sfp_phylink_ops); -+ sfp_bus_put(bus); -+ -+ return ret; - } - - /** -@@ -675,8 +678,7 @@ EXPORT_SYMBOL_GPL(phylink_create); - */ - void phylink_destroy(struct phylink *pl) - { -- if (pl->sfp_bus) -- sfp_unregister_upstream(pl->sfp_bus); -+ sfp_bus_del_upstream(pl->sfp_bus); - if (pl->link_gpio) - gpiod_put(pl->link_gpio); - ---- a/drivers/net/phy/sfp-bus.c -+++ b/drivers/net/phy/sfp-bus.c -@@ -404,10 +404,19 @@ static void sfp_bus_release(struct kref - kfree(bus); - } - --static void sfp_bus_put(struct sfp_bus *bus) -+/** -+ * sfp_bus_put() - put a reference on the &struct sfp_bus -+ * bus: the &struct sfp_bus found via sfp_bus_find_fwnode() -+ * -+ * Put a reference on the &struct sfp_bus and free the underlying structure -+ * if this was the last reference. -+ */ -+void sfp_bus_put(struct sfp_bus *bus) - { -- kref_put_mutex(&bus->kref, sfp_bus_release, &sfp_mutex); -+ if (bus) -+ kref_put_mutex(&bus->kref, sfp_bus_release, &sfp_mutex); - } -+EXPORT_SYMBOL_GPL(sfp_bus_put); - - static int sfp_register_bus(struct sfp_bus *bus) - { -@@ -423,11 +432,11 @@ static int sfp_register_bus(struct sfp_b - return ret; - } - } -+ bus->registered = true; - bus->socket_ops->attach(bus->sfp); - if (bus->started) - bus->socket_ops->start(bus->sfp); - bus->upstream_ops->attach(bus->upstream, bus); -- bus->registered = true; - return 0; - } - -@@ -521,13 +530,12 @@ static void sfp_upstream_clear(struct sf - } - - /** -- * sfp_register_upstream_node() - parse and register the neighbouring device -+ * sfp_bus_find_fwnode() - parse and locate the SFP bus from fwnode - * @fwnode: firmware node for the parent device (MAC or PHY) -- * @upstream: the upstream private data -- * @ops: the upstream's &struct sfp_upstream_ops - * -- * Parse the parent device's firmware node for a SFP bus, and register the -- * SFP bus using sfp_register_upstream(). -+ * Parse the parent device's firmware node for a SFP bus, and locate -+ * the sfp_bus structure, incrementing its reference count. This must -+ * be put via sfp_bus_put() when done. - * - * Returns: on success, a pointer to the sfp_bus structure, - * %NULL if no SFP is specified, -@@ -537,9 +545,7 @@ static void sfp_upstream_clear(struct sf - * %-ENOMEM if we failed to allocate the bus. - * an error from the upstream's connect_phy() method. - */ --struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, -- void *upstream, -- const struct sfp_upstream_ops *ops) -+struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) - { - struct fwnode_reference_args ref; - struct sfp_bus *bus; -@@ -557,7 +563,39 @@ struct sfp_bus *sfp_register_upstream_no - if (!bus) - return ERR_PTR(-ENOMEM); - -+ return bus; -+} -+EXPORT_SYMBOL_GPL(sfp_bus_find_fwnode); -+ -+/** -+ * sfp_bus_add_upstream() - parse and register the neighbouring device -+ * @bus: the &struct sfp_bus found via sfp_bus_find_fwnode() -+ * @upstream: the upstream private data -+ * @ops: the upstream's &struct sfp_upstream_ops -+ * -+ * Add upstream driver for the SFP bus, and if the bus is complete, register -+ * the SFP bus using sfp_register_upstream(). This takes a reference on the -+ * bus, so it is safe to put the bus after this call. -+ * -+ * Returns: on success, a pointer to the sfp_bus structure, -+ * %NULL if no SFP is specified, -+ * on failure, an error pointer value: -+ * corresponding to the errors detailed for -+ * fwnode_property_get_reference_args(). -+ * %-ENOMEM if we failed to allocate the bus. -+ * an error from the upstream's connect_phy() method. -+ */ -+int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, -+ const struct sfp_upstream_ops *ops) -+{ -+ int ret; -+ -+ /* If no bus, return success */ -+ if (!bus) -+ return 0; -+ - rtnl_lock(); -+ kref_get(&bus->kref); - bus->upstream_ops = ops; - bus->upstream = upstream; - -@@ -570,33 +608,33 @@ struct sfp_bus *sfp_register_upstream_no - } - rtnl_unlock(); - -- if (ret) { -+ if (ret) - sfp_bus_put(bus); -- bus = ERR_PTR(ret); -- } - -- return bus; -+ return ret; - } --EXPORT_SYMBOL_GPL(sfp_register_upstream_node); -+EXPORT_SYMBOL_GPL(sfp_bus_add_upstream); - - /** -- * sfp_unregister_upstream() - Unregister sfp bus -+ * sfp_bus_del_upstream() - Delete a sfp bus - * @bus: a pointer to the &struct sfp_bus structure for the sfp module - * -- * Unregister a previously registered upstream connection for the SFP -- * module. @bus is returned from sfp_register_upstream(). -+ * Delete a previously registered upstream connection for the SFP -+ * module. @bus should have been added by sfp_bus_add_upstream(). - */ --void sfp_unregister_upstream(struct sfp_bus *bus) -+void sfp_bus_del_upstream(struct sfp_bus *bus) - { -- rtnl_lock(); -- if (bus->sfp) -- sfp_unregister_bus(bus); -- sfp_upstream_clear(bus); -- rtnl_unlock(); -+ if (bus) { -+ rtnl_lock(); -+ if (bus->sfp) -+ sfp_unregister_bus(bus); -+ sfp_upstream_clear(bus); -+ rtnl_unlock(); - -- sfp_bus_put(bus); -+ sfp_bus_put(bus); -+ } - } --EXPORT_SYMBOL_GPL(sfp_unregister_upstream); -+EXPORT_SYMBOL_GPL(sfp_bus_del_upstream); - - /* Socket driver entry points */ - int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev) ---- a/include/linux/sfp.h -+++ b/include/linux/sfp.h -@@ -508,10 +508,11 @@ int sfp_get_module_eeprom(struct sfp_bus - u8 *data); - void sfp_upstream_start(struct sfp_bus *bus); - void sfp_upstream_stop(struct sfp_bus *bus); --struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, -- void *upstream, -- const struct sfp_upstream_ops *ops); --void sfp_unregister_upstream(struct sfp_bus *bus); -+void sfp_bus_put(struct sfp_bus *bus); -+struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode); -+int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, -+ const struct sfp_upstream_ops *ops); -+void sfp_bus_del_upstream(struct sfp_bus *bus); - #else - static inline int sfp_parse_port(struct sfp_bus *bus, - const struct sfp_eeprom_id *id, -@@ -553,14 +554,22 @@ static inline void sfp_upstream_stop(str - { - } - --static inline struct sfp_bus *sfp_register_upstream_node( -- struct fwnode_handle *fwnode, void *upstream, -- const struct sfp_upstream_ops *ops) -+static inline void sfp_bus_put(struct sfp_bus *bus) -+{ -+} -+ -+static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) - { - return NULL; - } - --static inline void sfp_unregister_upstream(struct sfp_bus *bus) -+static int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, -+ const struct sfp_upstream_ops *ops) -+{ -+ return 0; -+} -+ -+static inline void sfp_bus_del_upstream(struct sfp_bus *bus) - { - } - #endif diff --git a/target/linux/generic/backport-5.4/718-v5.5-net-sfp-fix-sfp_bus_put-kernel-documentation.patch b/target/linux/generic/backport-5.4/718-v5.5-net-sfp-fix-sfp_bus_put-kernel-documentation.patch deleted file mode 100644 index c7bfd8a304..0000000000 --- a/target/linux/generic/backport-5.4/718-v5.5-net-sfp-fix-sfp_bus_put-kernel-documentation.patch +++ /dev/null @@ -1,27 +0,0 @@ -From ea7bfd81921827d334c2a23bd11ef0e4e2abafd2 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Sat, 9 Nov 2019 08:13:50 +0000 -Subject: [PATCH 616/660] net: sfp: fix sfp_bus_put() kernel documentation - -The kbuild test robot found a problem with htmldocs with the recent -change to the SFP interfaces. Fix the kernel documentation for -sfp_bus_put() which was missing an '@' before the argument name -description. - -Fixes: 727b3668b730 ("net: sfp: rework upstream interface") -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp-bus.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/phy/sfp-bus.c -+++ b/drivers/net/phy/sfp-bus.c -@@ -406,7 +406,7 @@ static void sfp_bus_release(struct kref - - /** - * sfp_bus_put() - put a reference on the &struct sfp_bus -- * bus: the &struct sfp_bus found via sfp_bus_find_fwnode() -+ * @bus: the &struct sfp_bus found via sfp_bus_find_fwnode() - * - * Put a reference on the &struct sfp_bus and free the underlying structure - * if this was the last reference. diff --git a/target/linux/generic/backport-5.4/719-v5.5-net-sfp-fix-sfp_bus_add_upstream-warning.patch b/target/linux/generic/backport-5.4/719-v5.5-net-sfp-fix-sfp_bus_add_upstream-warning.patch deleted file mode 100644 index 9528049e1b..0000000000 --- a/target/linux/generic/backport-5.4/719-v5.5-net-sfp-fix-sfp_bus_add_upstream-warning.patch +++ /dev/null @@ -1,27 +0,0 @@ -From f76d84cd85f8bd3f083495f7ca723822cba8abc9 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Mon, 11 Nov 2019 10:23:35 +0000 -Subject: [PATCH 617/660] net: sfp: fix sfp_bus_add_upstream() warning - -When building with SFP disabled, the stub for sfp_bus_add_upstream() -missed "inline". Add it. - -Fixes: 727b3668b730 ("net: sfp: rework upstream interface") -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - include/linux/sfp.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/include/linux/sfp.h -+++ b/include/linux/sfp.h -@@ -563,8 +563,8 @@ static inline struct sfp_bus *sfp_bus_fi - return NULL; - } - --static int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, -- const struct sfp_upstream_ops *ops) -+static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, -+ const struct sfp_upstream_ops *ops) - { - return 0; - } diff --git a/target/linux/generic/backport-5.4/720-v5.5-net-sfp-move-sfp-sub-state-machines-into-separate-fu.patch b/target/linux/generic/backport-5.4/720-v5.5-net-sfp-move-sfp-sub-state-machines-into-separate-fu.patch deleted file mode 100644 index e4ca85b6e2..0000000000 --- a/target/linux/generic/backport-5.4/720-v5.5-net-sfp-move-sfp-sub-state-machines-into-separate-fu.patch +++ /dev/null @@ -1,124 +0,0 @@ -From b9d6ed5cdb67533feda7f221eb06f2f9f1ff5047 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 11 Oct 2019 19:33:58 +0100 -Subject: [PATCH 618/660] net: sfp: move sfp sub-state machines into separate - functions - -Move the SFP sub-state machines out of the main state machine function, -in preparation for it doing a bit more with the device state. By doing -so, we ensure that our debug after the main state machine is always -printed. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 74 +++++++++++++++++++++++++------------------ - 1 file changed, 43 insertions(+), 31 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1544,19 +1544,34 @@ static void sfp_sm_mod_remove(struct sfp - dev_info(sfp->dev, "module removed\n"); - } - --static void sfp_sm_event(struct sfp *sfp, unsigned int event) -+/* This state machine tracks the netdev up/down state */ -+static void sfp_sm_device(struct sfp *sfp, unsigned int event) - { -- mutex_lock(&sfp->sm_mutex); -+ switch (sfp->sm_dev_state) { -+ default: -+ if (event == SFP_E_DEV_UP) -+ sfp->sm_dev_state = SFP_DEV_UP; -+ break; - -- dev_dbg(sfp->dev, "SM: enter %s:%s:%s event %s\n", -- mod_state_to_str(sfp->sm_mod_state), -- dev_state_to_str(sfp->sm_dev_state), -- sm_state_to_str(sfp->sm_state), -- event_to_str(event)); -+ case SFP_DEV_UP: -+ if (event == SFP_E_DEV_DOWN) { -+ /* If the module has a PHY, avoid raising TX disable -+ * as this resets the PHY. Otherwise, raise it to -+ * turn the laser off. -+ */ -+ if (!sfp->mod_phy) -+ sfp_module_tx_disable(sfp); -+ sfp->sm_dev_state = SFP_DEV_DOWN; -+ } -+ break; -+ } -+} - -- /* This state machine tracks the insert/remove state of -- * the module, and handles probing the on-board EEPROM. -- */ -+/* This state machine tracks the insert/remove state of -+ * the module, and handles probing the on-board EEPROM. -+ */ -+static void sfp_sm_module(struct sfp *sfp, unsigned int event) -+{ - switch (sfp->sm_mod_state) { - default: - if (event == SFP_E_INSERT && sfp->attached) { -@@ -1596,27 +1611,10 @@ static void sfp_sm_event(struct sfp *sfp - } - break; - } -+} - -- /* This state machine tracks the netdev up/down state */ -- switch (sfp->sm_dev_state) { -- default: -- if (event == SFP_E_DEV_UP) -- sfp->sm_dev_state = SFP_DEV_UP; -- break; -- -- case SFP_DEV_UP: -- if (event == SFP_E_DEV_DOWN) { -- /* If the module has a PHY, avoid raising TX disable -- * as this resets the PHY. Otherwise, raise it to -- * turn the laser off. -- */ -- if (!sfp->mod_phy) -- sfp_module_tx_disable(sfp); -- sfp->sm_dev_state = SFP_DEV_DOWN; -- } -- break; -- } -- -+static void sfp_sm_main(struct sfp *sfp, unsigned int event) -+{ - /* Some events are global */ - if (sfp->sm_state != SFP_S_DOWN && - (sfp->sm_mod_state != SFP_MOD_PRESENT || -@@ -1627,7 +1625,6 @@ static void sfp_sm_event(struct sfp *sfp - if (sfp->mod_phy) - sfp_sm_phy_detach(sfp); - sfp_sm_next(sfp, SFP_S_DOWN, 0); -- mutex_unlock(&sfp->sm_mutex); - return; - } - -@@ -1682,6 +1679,21 @@ static void sfp_sm_event(struct sfp *sfp - case SFP_S_TX_DISABLE: - break; - } -+} -+ -+static void sfp_sm_event(struct sfp *sfp, unsigned int event) -+{ -+ mutex_lock(&sfp->sm_mutex); -+ -+ dev_dbg(sfp->dev, "SM: enter %s:%s:%s event %s\n", -+ mod_state_to_str(sfp->sm_mod_state), -+ dev_state_to_str(sfp->sm_dev_state), -+ sm_state_to_str(sfp->sm_state), -+ event_to_str(event)); -+ -+ sfp_sm_module(sfp, event); -+ sfp_sm_device(sfp, event); -+ sfp_sm_main(sfp, event); - - dev_dbg(sfp->dev, "SM: exit %s:%s:%s\n", - mod_state_to_str(sfp->sm_mod_state), diff --git a/target/linux/generic/backport-5.4/721-v5.5-net-sfp-move-tx-disable-on-device-down-to-main-state.patch b/target/linux/generic/backport-5.4/721-v5.5-net-sfp-move-tx-disable-on-device-down-to-main-state.patch deleted file mode 100644 index 71021c8f4e..0000000000 --- a/target/linux/generic/backport-5.4/721-v5.5-net-sfp-move-tx-disable-on-device-down-to-main-state.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7e89b737c97a9e7a81dd1584000bc136b92f12fd Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 11 Oct 2019 22:14:47 +0100 -Subject: [PATCH 619/660] net: sfp: move tx disable on device down to main - state machine - -Move the tx disable assertion on device down to the main state -machine. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1554,15 +1554,8 @@ static void sfp_sm_device(struct sfp *sf - break; - - case SFP_DEV_UP: -- if (event == SFP_E_DEV_DOWN) { -- /* If the module has a PHY, avoid raising TX disable -- * as this resets the PHY. Otherwise, raise it to -- * turn the laser off. -- */ -- if (!sfp->mod_phy) -- sfp_module_tx_disable(sfp); -+ if (event == SFP_E_DEV_DOWN) - sfp->sm_dev_state = SFP_DEV_DOWN; -- } - break; - } - } -@@ -1624,6 +1617,7 @@ static void sfp_sm_main(struct sfp *sfp, - sfp_sm_link_down(sfp); - if (sfp->mod_phy) - sfp_sm_phy_detach(sfp); -+ sfp_module_tx_disable(sfp); - sfp_sm_next(sfp, SFP_S_DOWN, 0); - return; - } diff --git a/target/linux/generic/backport-5.4/722-v5.5-net-sfp-rename-sfp_sm_ins_next-as-sfp_sm_mod_next.patch b/target/linux/generic/backport-5.4/722-v5.5-net-sfp-rename-sfp_sm_ins_next-as-sfp_sm_mod_next.patch deleted file mode 100644 index 2974586b13..0000000000 --- a/target/linux/generic/backport-5.4/722-v5.5-net-sfp-rename-sfp_sm_ins_next-as-sfp_sm_mod_next.patch +++ /dev/null @@ -1,71 +0,0 @@ -From f2a1ccfc4ad4f97c98c3cc18eb32992151ce089a Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 11 Oct 2019 22:27:21 +0100 -Subject: [PATCH 620/660] net: sfp: rename sfp_sm_ins_next() as - sfp_sm_mod_next() - -sfp_sm_ins_next() modifies the module state machine. Change it's name -to reflect this. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1245,7 +1245,7 @@ static void sfp_sm_next(struct sfp *sfp, - sfp_sm_set_timer(sfp, timeout); - } - --static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state, -+static void sfp_sm_mod_next(struct sfp *sfp, unsigned int state, - unsigned int timeout) - { - sfp->sm_mod_state = state; -@@ -1569,22 +1569,22 @@ static void sfp_sm_module(struct sfp *sf - default: - if (event == SFP_E_INSERT && sfp->attached) { - sfp_module_tx_disable(sfp); -- sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); -+ sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); - } - break; - - case SFP_MOD_PROBE: - if (event == SFP_E_REMOVE) { -- sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); - } else if (event == SFP_E_TIMEOUT) { - int val = sfp_sm_mod_probe(sfp); - - if (val == 0) -- sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0); - else if (val > 0) -- sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val); -+ sfp_sm_mod_next(sfp, SFP_MOD_HPOWER, val); - else if (val != -EAGAIN) -- sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); - else - sfp_sm_set_timer(sfp, T_PROBE_RETRY); - } -@@ -1592,7 +1592,7 @@ static void sfp_sm_module(struct sfp *sf - - case SFP_MOD_HPOWER: - if (event == SFP_E_TIMEOUT) { -- sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0); - break; - } - /* fallthrough */ -@@ -1600,7 +1600,7 @@ static void sfp_sm_module(struct sfp *sf - case SFP_MOD_ERROR: - if (event == SFP_E_REMOVE) { - sfp_sm_mod_remove(sfp); -- sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); - } - break; - } diff --git a/target/linux/generic/backport-5.4/723-v5.5-net-sfp-handle-module-remove-outside-state-machine.patch b/target/linux/generic/backport-5.4/723-v5.5-net-sfp-handle-module-remove-outside-state-machine.patch deleted file mode 100644 index 62cdb8a6ce..0000000000 --- a/target/linux/generic/backport-5.4/723-v5.5-net-sfp-handle-module-remove-outside-state-machine.patch +++ /dev/null @@ -1,53 +0,0 @@ -From d2591ea5520e2ee8fa557f96bb64c23cafac4b20 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 15 Oct 2019 10:33:13 +0100 -Subject: [PATCH 621/660] net: sfp: handle module remove outside state machine - -Removing a module resets the module state machine back to its initial -state. Rather than explicitly handling this in every state, handle it -early on outside of the state machine. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1565,6 +1565,14 @@ static void sfp_sm_device(struct sfp *sf - */ - static void sfp_sm_module(struct sfp *sfp, unsigned int event) - { -+ /* Handle remove event globally, it resets this state machine */ -+ if (event == SFP_E_REMOVE) { -+ if (sfp->sm_mod_state > SFP_MOD_PROBE) -+ sfp_sm_mod_remove(sfp); -+ sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -+ return; -+ } -+ - switch (sfp->sm_mod_state) { - default: - if (event == SFP_E_INSERT && sfp->attached) { -@@ -1574,9 +1582,7 @@ static void sfp_sm_module(struct sfp *sf - break; - - case SFP_MOD_PROBE: -- if (event == SFP_E_REMOVE) { -- sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -- } else if (event == SFP_E_TIMEOUT) { -+ if (event == SFP_E_TIMEOUT) { - int val = sfp_sm_mod_probe(sfp); - - if (val == 0) -@@ -1598,10 +1604,6 @@ static void sfp_sm_module(struct sfp *sf - /* fallthrough */ - case SFP_MOD_PRESENT: - case SFP_MOD_ERROR: -- if (event == SFP_E_REMOVE) { -- sfp_sm_mod_remove(sfp); -- sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -- } - break; - } - } diff --git a/target/linux/generic/backport-5.4/724-v5.5-net-sfp-rename-T_PROBE_WAIT-to-T_SERIAL.patch b/target/linux/generic/backport-5.4/724-v5.5-net-sfp-rename-T_PROBE_WAIT-to-T_SERIAL.patch deleted file mode 100644 index 780e7d7876..0000000000 --- a/target/linux/generic/backport-5.4/724-v5.5-net-sfp-rename-T_PROBE_WAIT-to-T_SERIAL.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 615090acb3c0b41691f3a03522ea38350387c0e4 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 15 Oct 2019 10:54:15 +0100 -Subject: [PATCH 622/660] net: sfp: rename T_PROBE_WAIT to T_SERIAL - -SFF-8472 rev 12.2 defines the time for the serial bus to become ready -using t_serial. Use this as our identifier for this timeout to make -it clear what we are referring to. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 13 ++++++------- - 1 file changed, 6 insertions(+), 7 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -149,11 +149,10 @@ static const enum gpiod_flags gpio_flags - * the same length on the PCB, which means it's possible for MOD DEF 0 to - * connect before the I2C bus on MOD DEF 1/2. - * -- * The SFP MSA specifies 300ms as t_init (the time taken for TX_FAULT to -- * be deasserted) but makes no mention of the earliest time before we can -- * access the I2C EEPROM. However, Avago modules require 300ms. -+ * The SFF-8472 specifies t_serial ("Time from power on until module is -+ * ready for data transmission over the two wire serial bus.") as 300ms. - */ --#define T_PROBE_INIT msecs_to_jiffies(300) -+#define T_SERIAL msecs_to_jiffies(300) - #define T_HPOWER_LEVEL msecs_to_jiffies(300) - #define T_PROBE_RETRY msecs_to_jiffies(100) - -@@ -1560,8 +1559,8 @@ static void sfp_sm_device(struct sfp *sf - } - } - --/* This state machine tracks the insert/remove state of -- * the module, and handles probing the on-board EEPROM. -+/* This state machine tracks the insert/remove state of the module, probes -+ * the on-board EEPROM, and sets up the power level. - */ - static void sfp_sm_module(struct sfp *sfp, unsigned int event) - { -@@ -1577,7 +1576,7 @@ static void sfp_sm_module(struct sfp *sf - default: - if (event == SFP_E_INSERT && sfp->attached) { - sfp_module_tx_disable(sfp); -- sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); -+ sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_SERIAL); - } - break; - diff --git a/target/linux/generic/backport-5.4/725-v5.5-net-sfp-parse-SFP-power-requirement-earlier.patch b/target/linux/generic/backport-5.4/725-v5.5-net-sfp-parse-SFP-power-requirement-earlier.patch deleted file mode 100644 index df5ef9f79e..0000000000 --- a/target/linux/generic/backport-5.4/725-v5.5-net-sfp-parse-SFP-power-requirement-earlier.patch +++ /dev/null @@ -1,115 +0,0 @@ -From d4b8746219e8c0361e5ed6e440ab3a8a600d1f76 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 11 Oct 2019 17:24:40 +0100 -Subject: [PATCH 623/660] net: sfp: parse SFP power requirement earlier - -Parse the SFP power requirement earlier, in preparation for moving the -power level setup code. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 42 +++++++++++++++++++++++++++++------------- - 1 file changed, 29 insertions(+), 13 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -198,6 +198,8 @@ struct sfp { - unsigned int sm_retries; - - struct sfp_eeprom_id id; -+ unsigned int module_power_mW; -+ - #if IS_ENABLED(CONFIG_HWMON) - struct sfp_diag diag; - struct device *hwmon_dev; -@@ -1374,17 +1376,14 @@ static void sfp_sm_mod_init(struct sfp * - sfp_sm_probe_phy(sfp); - } - --static int sfp_sm_mod_hpower(struct sfp *sfp) -+static int sfp_module_parse_power(struct sfp *sfp) - { -- u32 power; -- u8 val; -- int err; -+ u32 power_mW = 1000; - -- power = 1000; - if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL)) -- power = 1500; -+ power_mW = 1500; - if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) -- power = 2000; -+ power_mW = 2000; - - if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE && - (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) != -@@ -1393,23 +1392,33 @@ static int sfp_sm_mod_hpower(struct sfp - * or requires an address change sequence, so assume that - * the module powers up in the indicated power mode. - */ -- if (power > sfp->max_power_mW) { -+ if (power_mW > sfp->max_power_mW) { - dev_err(sfp->dev, - "Host does not support %u.%uW modules\n", -- power / 1000, (power / 100) % 10); -+ power_mW / 1000, (power_mW / 100) % 10); - return -EINVAL; - } - return 0; - } - -- if (power > sfp->max_power_mW) { -+ if (power_mW > sfp->max_power_mW) { - dev_warn(sfp->dev, - "Host does not support %u.%uW modules, module left in power mode 1\n", -- power / 1000, (power / 100) % 10); -+ power_mW / 1000, (power_mW / 100) % 10); - return 0; - } - -- if (power <= 1000) -+ sfp->module_power_mW = power_mW; -+ -+ return 0; -+} -+ -+static int sfp_sm_mod_hpower(struct sfp *sfp) -+{ -+ u8 val; -+ int err; -+ -+ if (sfp->module_power_mW <= 1000) - return 0; - - err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); -@@ -1429,7 +1438,8 @@ static int sfp_sm_mod_hpower(struct sfp - } - - dev_info(sfp->dev, "Module switched to %u.%uW power level\n", -- power / 1000, (power / 100) % 10); -+ sfp->module_power_mW / 1000, -+ (sfp->module_power_mW / 100) % 10); - return T_HPOWER_LEVEL; - - err: -@@ -1516,6 +1526,11 @@ static int sfp_sm_mod_probe(struct sfp * - dev_warn(sfp->dev, - "module address swap to access page 0xA2 is not supported.\n"); - -+ /* Parse the module power requirement */ -+ ret = sfp_module_parse_power(sfp); -+ if (ret < 0) -+ return ret; -+ - ret = sfp_hwmon_insert(sfp); - if (ret < 0) - return ret; -@@ -1539,6 +1554,7 @@ static void sfp_sm_mod_remove(struct sfp - sfp_module_tx_disable(sfp); - - memset(&sfp->id, 0, sizeof(sfp->id)); -+ sfp->module_power_mW = 0; - - dev_info(sfp->dev, "module removed\n"); - } diff --git a/target/linux/generic/backport-5.4/726-v5.5-net-sfp-avoid-power-switch-on-address-change-modules.patch b/target/linux/generic/backport-5.4/726-v5.5-net-sfp-avoid-power-switch-on-address-change-modules.patch deleted file mode 100644 index 5237f55055..0000000000 --- a/target/linux/generic/backport-5.4/726-v5.5-net-sfp-avoid-power-switch-on-address-change-modules.patch +++ /dev/null @@ -1,65 +0,0 @@ -From dca678b8838945572cf50584cb33a7199c1fd397 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Thu, 17 Oct 2019 00:24:18 +0100 -Subject: [PATCH 624/660] net: sfp: avoid power switch on address-change - modules - -If the module indicates that it requires an address change sequence to -switch between address 0x50 and 0x51, which we don't support, we can't -write to the register that controls the power mode to switch to high -power mode. Warn the user that the module may not be functional in -this case, and don't try to change the power mode. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 31 ++++++++++++++++++++----------- - 1 file changed, 20 insertions(+), 11 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1385,25 +1385,34 @@ static int sfp_module_parse_power(struct - if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) - power_mW = 2000; - -- if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE && -- (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) != -- SFP_DIAGMON_DDM) { -- /* The module appears not to implement bus address 0xa2, -- * or requires an address change sequence, so assume that -- * the module powers up in the indicated power mode. -- */ -- if (power_mW > sfp->max_power_mW) { -+ if (power_mW > sfp->max_power_mW) { -+ /* Module power specification exceeds the allowed maximum. */ -+ if (sfp->id.ext.sff8472_compliance == -+ SFP_SFF8472_COMPLIANCE_NONE && -+ !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) { -+ /* The module appears not to implement bus address -+ * 0xa2, so assume that the module powers up in the -+ * indicated mode. -+ */ - dev_err(sfp->dev, - "Host does not support %u.%uW modules\n", - power_mW / 1000, (power_mW / 100) % 10); - return -EINVAL; -+ } else { -+ dev_warn(sfp->dev, -+ "Host does not support %u.%uW modules, module left in power mode 1\n", -+ power_mW / 1000, (power_mW / 100) % 10); -+ return 0; - } -- return 0; - } - -- if (power_mW > sfp->max_power_mW) { -+ /* If the module requires a higher power mode, but also requires -+ * an address change sequence, warn the user that the module may -+ * not be functional. -+ */ -+ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) { - dev_warn(sfp->dev, -- "Host does not support %u.%uW modules, module left in power mode 1\n", -+ "Address Change Sequence not supported but module requies %u.%uW, module may not be functional\n", - power_mW / 1000, (power_mW / 100) % 10); - return 0; - } diff --git a/target/linux/generic/backport-5.4/727-v5.5-net-sfp-control-TX_DISABLE-and-phy-only-from-main-st.patch b/target/linux/generic/backport-5.4/727-v5.5-net-sfp-control-TX_DISABLE-and-phy-only-from-main-st.patch deleted file mode 100644 index eebcac639f..0000000000 --- a/target/linux/generic/backport-5.4/727-v5.5-net-sfp-control-TX_DISABLE-and-phy-only-from-main-st.patch +++ /dev/null @@ -1,52 +0,0 @@ -From df5c4d93c5a59cba0f7479a4cd4e22b50726ce88 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Thu, 17 Oct 2019 11:12:42 +0100 -Subject: [PATCH 625/660] net: sfp: control TX_DISABLE and phy only from main - state machine - -We initialise TX_DISABLE when the sfp cage is probed, and then -maintain its state in the main state machine. However, the module -state machine: -- negates it when detecting a newly inserted module when it's already - guaranteed to be negated. -- negates it when the module is removed, but the main state machine - will do this anyway. - -Make TX_DISABLE entirely controlled by the main state machine. - -The main state machine also probes the module for a PHY, and removes -the PHY when the the module is removed. Hence, removing the PHY in -sfp_sm_module_remove() is also redundant, and is a left-over from -when we tried to probe for the PHY from the module state machine. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 9 +-------- - 1 file changed, 1 insertion(+), 8 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1557,11 +1557,6 @@ static void sfp_sm_mod_remove(struct sfp - - sfp_hwmon_remove(sfp); - -- if (sfp->mod_phy) -- sfp_sm_phy_detach(sfp); -- -- sfp_module_tx_disable(sfp); -- - memset(&sfp->id, 0, sizeof(sfp->id)); - sfp->module_power_mW = 0; - -@@ -1599,10 +1594,8 @@ static void sfp_sm_module(struct sfp *sf - - switch (sfp->sm_mod_state) { - default: -- if (event == SFP_E_INSERT && sfp->attached) { -- sfp_module_tx_disable(sfp); -+ if (event == SFP_E_INSERT && sfp->attached) - sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_SERIAL); -- } - break; - - case SFP_MOD_PROBE: diff --git a/target/linux/generic/backport-5.4/728-v5.5-net-sfp-split-the-PHY-probe-from-sfp_sm_mod_init.patch b/target/linux/generic/backport-5.4/728-v5.5-net-sfp-split-the-PHY-probe-from-sfp_sm_mod_init.patch deleted file mode 100644 index 92df26c6a2..0000000000 --- a/target/linux/generic/backport-5.4/728-v5.5-net-sfp-split-the-PHY-probe-from-sfp_sm_mod_init.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 5ed0bd49b2d3ac4439c2d7f44e5a82b7cf6f409a Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 18 Oct 2019 10:09:02 +0100 -Subject: [PATCH 626/660] net: sfp: split the PHY probe from sfp_sm_mod_init() - -Move the PHY probe into a separate function, splitting it from -sfp_sm_mod_init(). This will allow us to eliminate the 50ms mdelay() -inside the state machine. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 21 +++++++++++++-------- - 1 file changed, 13 insertions(+), 8 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1353,14 +1353,10 @@ static void sfp_sm_fault(struct sfp *sfp - static void sfp_sm_mod_init(struct sfp *sfp) - { - sfp_module_tx_enable(sfp); -+} - -- /* Wait t_init before indicating that the link is up, provided the -- * current state indicates no TX_FAULT. If TX_FAULT clears before -- * this time, that's fine too. -- */ -- sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); -- sfp->sm_retries = 5; -- -+static void sfp_sm_probe_for_phy(struct sfp *sfp) -+{ - /* Setting the serdes link mode is guesswork: there's no - * field in the EEPROM which indicates what mode should - * be used. -@@ -1645,8 +1641,17 @@ static void sfp_sm_main(struct sfp *sfp, - switch (sfp->sm_state) { - case SFP_S_DOWN: - if (sfp->sm_mod_state == SFP_MOD_PRESENT && -- sfp->sm_dev_state == SFP_DEV_UP) -+ sfp->sm_dev_state == SFP_DEV_UP) { - sfp_sm_mod_init(sfp); -+ sfp_sm_probe_for_phy(sfp); -+ -+ /* Wait t_init before indicating that the link is up, -+ * provided the current state indicates no TX_FAULT. If -+ * TX_FAULT clears before this time, that's fine too. -+ */ -+ sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); -+ sfp->sm_retries = 5; -+ } - break; - - case SFP_S_INIT: diff --git a/target/linux/generic/backport-5.4/729-v5.5-net-sfp-eliminate-mdelay-from-PHY-probe.patch b/target/linux/generic/backport-5.4/729-v5.5-net-sfp-eliminate-mdelay-from-PHY-probe.patch deleted file mode 100644 index e26a7276d3..0000000000 --- a/target/linux/generic/backport-5.4/729-v5.5-net-sfp-eliminate-mdelay-from-PHY-probe.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 0fe72afaa31f98ebd71bd6683fc47021105d0157 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 18 Oct 2019 10:21:46 +0100 -Subject: [PATCH 627/660] net: sfp: eliminate mdelay() from PHY probe - -Rather than using mdelay() to wait before probing the PHY (which holds -several locks, including the rtnl lock), add an extra wait state to -the state machine to introduce the 50ms delay without holding any -locks. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 52 +++++++++++++++++++++++++++++++++---------- - 1 file changed, 40 insertions(+), 12 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -54,6 +54,7 @@ enum { - SFP_DEV_UP, - - SFP_S_DOWN = 0, -+ SFP_S_WAIT, - SFP_S_INIT, - SFP_S_WAIT_LOS, - SFP_S_LINK_UP, -@@ -110,6 +111,7 @@ static const char *event_to_str(unsigned - - static const char * const sm_state_strings[] = { - [SFP_S_DOWN] = "down", -+ [SFP_S_WAIT] = "wait", - [SFP_S_INIT] = "init", - [SFP_S_WAIT_LOS] = "wait_los", - [SFP_S_LINK_UP] = "link_up", -@@ -141,6 +143,7 @@ static const enum gpiod_flags gpio_flags - GPIOD_ASIS, - }; - -+#define T_WAIT msecs_to_jiffies(50) - #define T_INIT_JIFFIES msecs_to_jiffies(300) - #define T_RESET_US 10 - #define T_FAULT_RECOVER msecs_to_jiffies(1000) -@@ -161,9 +164,6 @@ static const enum gpiod_flags gpio_flags - */ - #define SFP_PHY_ADDR 22 - --/* Give this long for the PHY to reset. */ --#define T_PHY_RESET_MS 50 -- - struct sff_data { - unsigned int gpios; - bool (*module_supported)(const struct sfp_eeprom_id *id); -@@ -1267,8 +1267,6 @@ static void sfp_sm_probe_phy(struct sfp - struct phy_device *phy; - int err; - -- msleep(T_PHY_RESET_MS); -- - phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR); - if (phy == ERR_PTR(-ENODEV)) { - dev_info(sfp->dev, "no PHY detected\n"); -@@ -1623,6 +1621,8 @@ static void sfp_sm_module(struct sfp *sf - - static void sfp_sm_main(struct sfp *sfp, unsigned int event) - { -+ unsigned long timeout; -+ - /* Some events are global */ - if (sfp->sm_state != SFP_S_DOWN && - (sfp->sm_mod_state != SFP_MOD_PRESENT || -@@ -1640,17 +1640,45 @@ static void sfp_sm_main(struct sfp *sfp, - /* The main state machine */ - switch (sfp->sm_state) { - case SFP_S_DOWN: -- if (sfp->sm_mod_state == SFP_MOD_PRESENT && -- sfp->sm_dev_state == SFP_DEV_UP) { -- sfp_sm_mod_init(sfp); -- sfp_sm_probe_for_phy(sfp); -+ if (sfp->sm_mod_state != SFP_MOD_PRESENT || -+ sfp->sm_dev_state != SFP_DEV_UP) -+ break; -+ -+ sfp_sm_mod_init(sfp); -+ -+ /* Initialise the fault clearance retries */ -+ sfp->sm_retries = 5; -+ -+ /* We need to check the TX_FAULT state, which is not defined -+ * while TX_DISABLE is asserted. The earliest we want to do -+ * anything (such as probe for a PHY) is 50ms. -+ */ -+ sfp_sm_next(sfp, SFP_S_WAIT, T_WAIT); -+ break; -+ -+ case SFP_S_WAIT: -+ if (event != SFP_E_TIMEOUT) -+ break; -+ -+ sfp_sm_probe_for_phy(sfp); - -+ if (sfp->state & SFP_F_TX_FAULT) { - /* Wait t_init before indicating that the link is up, - * provided the current state indicates no TX_FAULT. If - * TX_FAULT clears before this time, that's fine too. - */ -- sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); -- sfp->sm_retries = 5; -+ timeout = T_INIT_JIFFIES; -+ if (timeout > T_WAIT) -+ timeout -= T_WAIT; -+ else -+ timeout = 1; -+ -+ sfp_sm_next(sfp, SFP_S_INIT, timeout); -+ } else { -+ /* TX_FAULT is not asserted, assume the module has -+ * finished initialising. -+ */ -+ goto init_done; - } - break; - -@@ -1658,7 +1686,7 @@ static void sfp_sm_main(struct sfp *sfp, - if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) - sfp_sm_fault(sfp, true); - else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) -- sfp_sm_link_check_los(sfp); -+ init_done: sfp_sm_link_check_los(sfp); - break; - - case SFP_S_WAIT_LOS: diff --git a/target/linux/generic/backport-5.4/730-v5.5-net-sfp-allow-fault-processing-to-transition-to-othe.patch b/target/linux/generic/backport-5.4/730-v5.5-net-sfp-allow-fault-processing-to-transition-to-othe.patch deleted file mode 100644 index d45b0618bd..0000000000 --- a/target/linux/generic/backport-5.4/730-v5.5-net-sfp-allow-fault-processing-to-transition-to-othe.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 2aa424ee7fbe43e2cd24e28c2f6388c4e1796bd2 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 18 Oct 2019 09:58:33 +0100 -Subject: [PATCH 628/660] net: sfp: allow fault processing to transition to - other states - -Add the next state to sfp_sm_fault() so that it can branch to other -states. This will be necessary to improve the initialisation path. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -1334,7 +1334,7 @@ static bool sfp_los_event_inactive(struc - event == SFP_E_LOS_LOW); - } - --static void sfp_sm_fault(struct sfp *sfp, bool warn) -+static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) - { - if (sfp->sm_retries && !--sfp->sm_retries) { - dev_err(sfp->dev, -@@ -1344,7 +1344,7 @@ static void sfp_sm_fault(struct sfp *sfp - if (warn) - dev_err(sfp->dev, "module transmit fault indicated\n"); - -- sfp_sm_next(sfp, SFP_S_TX_FAULT, T_FAULT_RECOVER); -+ sfp_sm_next(sfp, next_state, T_FAULT_RECOVER); - } - } - -@@ -1684,14 +1684,14 @@ static void sfp_sm_main(struct sfp *sfp, - - case SFP_S_INIT: - if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) -- sfp_sm_fault(sfp, true); -+ sfp_sm_fault(sfp, SFP_S_TX_FAULT, true); - else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) - init_done: sfp_sm_link_check_los(sfp); - break; - - case SFP_S_WAIT_LOS: - if (event == SFP_E_TX_FAULT) -- sfp_sm_fault(sfp, true); -+ sfp_sm_fault(sfp, SFP_S_TX_FAULT, true); - else if (sfp_los_event_inactive(sfp, event)) - sfp_sm_link_up(sfp); - break; -@@ -1699,7 +1699,7 @@ static void sfp_sm_main(struct sfp *sfp, - case SFP_S_LINK_UP: - if (event == SFP_E_TX_FAULT) { - sfp_sm_link_down(sfp); -- sfp_sm_fault(sfp, true); -+ sfp_sm_fault(sfp, SFP_S_TX_FAULT, true); - } else if (sfp_los_event_active(sfp, event)) { - sfp_sm_link_down(sfp); - sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); -@@ -1715,7 +1715,7 @@ static void sfp_sm_main(struct sfp *sfp, - - case SFP_S_REINIT: - if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) { -- sfp_sm_fault(sfp, false); -+ sfp_sm_fault(sfp, SFP_S_TX_FAULT, false); - } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { - dev_info(sfp->dev, "module transmit fault recovered\n"); - sfp_sm_link_check_los(sfp); diff --git a/target/linux/generic/backport-5.4/731-v5.5-net-sfp-ensure-TX_FAULT-has-deasserted-before-probin.patch b/target/linux/generic/backport-5.4/731-v5.5-net-sfp-ensure-TX_FAULT-has-deasserted-before-probin.patch deleted file mode 100644 index acca29be87..0000000000 --- a/target/linux/generic/backport-5.4/731-v5.5-net-sfp-ensure-TX_FAULT-has-deasserted-before-probin.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 38b62a12231be4b86fc5ca5477579d29831c02a5 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 18 Oct 2019 10:31:07 +0100 -Subject: [PATCH 629/660] net: sfp: ensure TX_FAULT has deasserted before - probing the PHY - -TX_FAULT should be deasserted to indicate that the module has completed -its initialisation. This may include the on-board PHY, so wait until -the module has deasserted TX_FAULT before probing the PHY. - -This means that we need an extra state to handle a TX_FAULT that -remains set for longer than t_init, since using the existing handling -state would bypass the PHY probe. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 31 +++++++++++++++++++++++++------ - 1 file changed, 25 insertions(+), 6 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -56,6 +56,7 @@ enum { - SFP_S_DOWN = 0, - SFP_S_WAIT, - SFP_S_INIT, -+ SFP_S_INIT_TX_FAULT, - SFP_S_WAIT_LOS, - SFP_S_LINK_UP, - SFP_S_TX_FAULT, -@@ -113,6 +114,7 @@ static const char * const sm_state_strin - [SFP_S_DOWN] = "down", - [SFP_S_WAIT] = "wait", - [SFP_S_INIT] = "init", -+ [SFP_S_INIT_TX_FAULT] = "init_tx_fault", - [SFP_S_WAIT_LOS] = "wait_los", - [SFP_S_LINK_UP] = "link_up", - [SFP_S_TX_FAULT] = "tx_fault", -@@ -1660,8 +1662,6 @@ static void sfp_sm_main(struct sfp *sfp, - if (event != SFP_E_TIMEOUT) - break; - -- sfp_sm_probe_for_phy(sfp); -- - if (sfp->state & SFP_F_TX_FAULT) { - /* Wait t_init before indicating that the link is up, - * provided the current state indicates no TX_FAULT. If -@@ -1683,10 +1683,29 @@ static void sfp_sm_main(struct sfp *sfp, - break; - - case SFP_S_INIT: -- if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) -- sfp_sm_fault(sfp, SFP_S_TX_FAULT, true); -- else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) -- init_done: sfp_sm_link_check_los(sfp); -+ if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) { -+ /* TX_FAULT is still asserted after t_init, so assume -+ * there is a fault. -+ */ -+ sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, -+ sfp->sm_retries == 5); -+ } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { -+ init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT -+ * clear. Probe for the PHY and check the LOS state. -+ */ -+ sfp_sm_probe_for_phy(sfp); -+ sfp_sm_link_check_los(sfp); -+ -+ /* Reset the fault retry count */ -+ sfp->sm_retries = 5; -+ } -+ break; -+ -+ case SFP_S_INIT_TX_FAULT: -+ if (event == SFP_E_TIMEOUT) { -+ sfp_module_tx_fault_reset(sfp); -+ sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES); -+ } - break; - - case SFP_S_WAIT_LOS: diff --git a/target/linux/generic/backport-5.4/732-v5.5-net-sfp-track-upstream-s-attachment-state-in-state-m.patch b/target/linux/generic/backport-5.4/732-v5.5-net-sfp-track-upstream-s-attachment-state-in-state-m.patch deleted file mode 100644 index 714d783c4e..0000000000 --- a/target/linux/generic/backport-5.4/732-v5.5-net-sfp-track-upstream-s-attachment-state-in-state-m.patch +++ /dev/null @@ -1,153 +0,0 @@ -From ec6036a58f979c66bbd5cd9d0d1c783a98c2c644 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 5 Nov 2019 12:57:40 +0000 -Subject: [PATCH 630/660] net: sfp: track upstream's attachment state in state - machine - -Track the upstream's attachment state in the state machine rather than -maintaining a boolean, which ensures that we have a strict order of -ATTACH followed by an UP event - we can never believe that a newly -attached upstream will be anything but down. - -Rearrange the order of state machines so we run the module state -machine after the upstream device's state machine, so the module state -machine can check the current state of the device and take action to -e.g. reset back to empty state when the upstream is detached. - -This is to allow the module detection to run independently of the -network device becoming available. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 42 +++++++++++++++++++++++++++++------------- - 1 file changed, 29 insertions(+), 13 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -36,6 +36,8 @@ enum { - - SFP_E_INSERT = 0, - SFP_E_REMOVE, -+ SFP_E_DEV_ATTACH, -+ SFP_E_DEV_DETACH, - SFP_E_DEV_DOWN, - SFP_E_DEV_UP, - SFP_E_TX_FAULT, -@@ -50,7 +52,8 @@ enum { - SFP_MOD_PRESENT, - SFP_MOD_ERROR, - -- SFP_DEV_DOWN = 0, -+ SFP_DEV_DETACHED = 0, -+ SFP_DEV_DOWN, - SFP_DEV_UP, - - SFP_S_DOWN = 0, -@@ -80,6 +83,7 @@ static const char *mod_state_to_str(unsi - } - - static const char * const dev_state_strings[] = { -+ [SFP_DEV_DETACHED] = "detached", - [SFP_DEV_DOWN] = "down", - [SFP_DEV_UP] = "up", - }; -@@ -94,6 +98,8 @@ static const char *dev_state_to_str(unsi - static const char * const event_strings[] = { - [SFP_E_INSERT] = "insert", - [SFP_E_REMOVE] = "remove", -+ [SFP_E_DEV_ATTACH] = "dev_attach", -+ [SFP_E_DEV_DETACH] = "dev_detach", - [SFP_E_DEV_DOWN] = "dev_down", - [SFP_E_DEV_UP] = "dev_up", - [SFP_E_TX_FAULT] = "tx_fault", -@@ -188,7 +194,6 @@ struct sfp { - struct gpio_desc *gpio[GPIO_MAX]; - int gpio_irq[GPIO_MAX]; - -- bool attached; - struct mutex st_mutex; /* Protects state */ - unsigned int state; - struct delayed_work poll; -@@ -1559,17 +1564,26 @@ static void sfp_sm_mod_remove(struct sfp - dev_info(sfp->dev, "module removed\n"); - } - --/* This state machine tracks the netdev up/down state */ -+/* This state machine tracks the upstream's state */ - static void sfp_sm_device(struct sfp *sfp, unsigned int event) - { - switch (sfp->sm_dev_state) { - default: -- if (event == SFP_E_DEV_UP) -+ if (event == SFP_E_DEV_ATTACH) -+ sfp->sm_dev_state = SFP_DEV_DOWN; -+ break; -+ -+ case SFP_DEV_DOWN: -+ if (event == SFP_E_DEV_DETACH) -+ sfp->sm_dev_state = SFP_DEV_DETACHED; -+ else if (event == SFP_E_DEV_UP) - sfp->sm_dev_state = SFP_DEV_UP; - break; - - case SFP_DEV_UP: -- if (event == SFP_E_DEV_DOWN) -+ if (event == SFP_E_DEV_DETACH) -+ sfp->sm_dev_state = SFP_DEV_DETACHED; -+ else if (event == SFP_E_DEV_DOWN) - sfp->sm_dev_state = SFP_DEV_DOWN; - break; - } -@@ -1580,17 +1594,20 @@ static void sfp_sm_device(struct sfp *sf - */ - static void sfp_sm_module(struct sfp *sfp, unsigned int event) - { -- /* Handle remove event globally, it resets this state machine */ -- if (event == SFP_E_REMOVE) { -+ /* Handle remove event globally, it resets this state machine. -+ * Also deal with upstream detachment. -+ */ -+ if (event == SFP_E_REMOVE || sfp->sm_dev_state < SFP_DEV_DOWN) { - if (sfp->sm_mod_state > SFP_MOD_PROBE) - sfp_sm_mod_remove(sfp); -- sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -+ if (sfp->sm_mod_state != SFP_MOD_EMPTY) -+ sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); - return; - } - - switch (sfp->sm_mod_state) { - default: -- if (event == SFP_E_INSERT && sfp->attached) -+ if (event == SFP_E_INSERT) - sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_SERIAL); - break; - -@@ -1756,8 +1773,8 @@ static void sfp_sm_event(struct sfp *sfp - sm_state_to_str(sfp->sm_state), - event_to_str(event)); - -- sfp_sm_module(sfp, event); - sfp_sm_device(sfp, event); -+ sfp_sm_module(sfp, event); - sfp_sm_main(sfp, event); - - dev_dbg(sfp->dev, "SM: exit %s:%s:%s\n", -@@ -1770,15 +1787,14 @@ static void sfp_sm_event(struct sfp *sfp - - static void sfp_attach(struct sfp *sfp) - { -- sfp->attached = true; -+ sfp_sm_event(sfp, SFP_E_DEV_ATTACH); - if (sfp->state & SFP_F_PRESENT) - sfp_sm_event(sfp, SFP_E_INSERT); - } - - static void sfp_detach(struct sfp *sfp) - { -- sfp->attached = false; -- sfp_sm_event(sfp, SFP_E_REMOVE); -+ sfp_sm_event(sfp, SFP_E_DEV_DETACH); - } - - static void sfp_start(struct sfp *sfp) diff --git a/target/linux/generic/backport-5.4/733-v5.5-net-sfp-split-power-mode-switching-from-probe.patch b/target/linux/generic/backport-5.4/733-v5.5-net-sfp-split-power-mode-switching-from-probe.patch deleted file mode 100644 index f645e44191..0000000000 --- a/target/linux/generic/backport-5.4/733-v5.5-net-sfp-split-power-mode-switching-from-probe.patch +++ /dev/null @@ -1,184 +0,0 @@ -From fdff863a4ce3677907f64396e34c45025abb6600 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 5 Nov 2019 12:59:36 +0000 -Subject: [PATCH 631/660] net: sfp: split power mode switching from probe - -Switch the power mode switching from the probe, so that we don't -repeatedly re-probe the SFP device if there is a problem accessing -the registers at I2C address 0x51. - -In splitting this out, we can also fix a bug where we leave the module -in high-power mode when the upstream device is detached but the module -is still inserted. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 101 ++++++++++++++++++++++++++---------------- - 1 file changed, 64 insertions(+), 37 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -49,6 +49,7 @@ enum { - SFP_MOD_EMPTY = 0, - SFP_MOD_PROBE, - SFP_MOD_HPOWER, -+ SFP_MOD_WAITPWR, - SFP_MOD_PRESENT, - SFP_MOD_ERROR, - -@@ -71,6 +72,7 @@ static const char * const mod_state_str - [SFP_MOD_EMPTY] = "empty", - [SFP_MOD_PROBE] = "probe", - [SFP_MOD_HPOWER] = "hpower", -+ [SFP_MOD_WAITPWR] = "waitpwr", - [SFP_MOD_PRESENT] = "present", - [SFP_MOD_ERROR] = "error", - }; -@@ -1423,37 +1425,34 @@ static int sfp_module_parse_power(struct - return 0; - } - --static int sfp_sm_mod_hpower(struct sfp *sfp) -+static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) - { - u8 val; - int err; - -- if (sfp->module_power_mW <= 1000) -- return 0; -- - err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); - if (err != sizeof(val)) { - dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err); -- err = -EAGAIN; -- goto err; -+ return -EAGAIN; - } - -- val |= BIT(0); -+ if (enable) -+ val |= BIT(0); -+ else -+ val &= ~BIT(0); - - err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); - if (err != sizeof(val)) { - dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err); -- err = -EAGAIN; -- goto err; -+ return -EAGAIN; - } - -- dev_info(sfp->dev, "Module switched to %u.%uW power level\n", -- sfp->module_power_mW / 1000, -- (sfp->module_power_mW / 100) % 10); -- return T_HPOWER_LEVEL; -+ if (enable) -+ dev_info(sfp->dev, "Module switched to %u.%uW power level\n", -+ sfp->module_power_mW / 1000, -+ (sfp->module_power_mW / 100) % 10); - --err: -- return err; -+ return 0; - } - - static int sfp_sm_mod_probe(struct sfp *sfp) -@@ -1549,7 +1548,7 @@ static int sfp_sm_mod_probe(struct sfp * - if (ret < 0) - return ret; - -- return sfp_sm_mod_hpower(sfp); -+ return 0; - } - - static void sfp_sm_mod_remove(struct sfp *sfp) -@@ -1594,13 +1593,22 @@ static void sfp_sm_device(struct sfp *sf - */ - static void sfp_sm_module(struct sfp *sfp, unsigned int event) - { -- /* Handle remove event globally, it resets this state machine. -- * Also deal with upstream detachment. -- */ -- if (event == SFP_E_REMOVE || sfp->sm_dev_state < SFP_DEV_DOWN) { -+ int err; -+ -+ /* Handle remove event globally, it resets this state machine */ -+ if (event == SFP_E_REMOVE) { - if (sfp->sm_mod_state > SFP_MOD_PROBE) - sfp_sm_mod_remove(sfp); -- if (sfp->sm_mod_state != SFP_MOD_EMPTY) -+ sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -+ return; -+ } -+ -+ /* Handle device detach globally */ -+ if (sfp->sm_dev_state < SFP_DEV_DOWN) { -+ if (sfp->module_power_mW > 1000 && -+ sfp->sm_mod_state > SFP_MOD_HPOWER) -+ sfp_sm_mod_hpower(sfp, false); -+ if (sfp->sm_mod_state > SFP_MOD_EMPTY) - sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); - return; - } -@@ -1612,26 +1620,45 @@ static void sfp_sm_module(struct sfp *sf - break; - - case SFP_MOD_PROBE: -- if (event == SFP_E_TIMEOUT) { -- int val = sfp_sm_mod_probe(sfp); -+ if (event != SFP_E_TIMEOUT) -+ break; - -- if (val == 0) -- sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0); -- else if (val > 0) -- sfp_sm_mod_next(sfp, SFP_MOD_HPOWER, val); -- else if (val != -EAGAIN) -- sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -- else -- sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ err = sfp_sm_mod_probe(sfp); -+ if (err == -EAGAIN) { -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ break; - } -- break; -+ if (err < 0) { -+ sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -+ break; -+ } -+ -+ /* If this is a power level 1 module, we are done */ -+ if (sfp->module_power_mW <= 1000) -+ goto insert; - -+ sfp_sm_mod_next(sfp, SFP_MOD_HPOWER, 0); -+ /* fall through */ - case SFP_MOD_HPOWER: -- if (event == SFP_E_TIMEOUT) { -- sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0); -+ /* Enable high power mode */ -+ err = sfp_sm_mod_hpower(sfp, true); -+ if (err == 0) -+ sfp_sm_mod_next(sfp, SFP_MOD_WAITPWR, T_HPOWER_LEVEL); -+ else if (err != -EAGAIN) -+ sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -+ else -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ break; -+ -+ case SFP_MOD_WAITPWR: -+ /* Wait for T_HPOWER_LEVEL to time out */ -+ if (event != SFP_E_TIMEOUT) - break; -- } -- /* fallthrough */ -+ -+ insert: -+ sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0); -+ break; -+ - case SFP_MOD_PRESENT: - case SFP_MOD_ERROR: - break; diff --git a/target/linux/generic/backport-5.4/734-v5.5-net-sfp-move-module-insert-reporting-out-of-probe.patch b/target/linux/generic/backport-5.4/734-v5.5-net-sfp-move-module-insert-reporting-out-of-probe.patch deleted file mode 100644 index e49bde27e3..0000000000 --- a/target/linux/generic/backport-5.4/734-v5.5-net-sfp-move-module-insert-reporting-out-of-probe.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 57cbf7453551db1df619b79410d79fc418d862d5 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 5 Nov 2019 13:00:45 +0000 -Subject: [PATCH 632/660] net: sfp: move module insert reporting out of probe - -Move the module insertion reporting out of the probe handling, but -after we have detected that the upstream has attached (since that is -whom we are reporting insertion to.) - -Only report module removal if we had previously reported a module -insertion. - -This gives cleaner semantics, and means we can probe the module before -we have an upstream attached. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 58 +++++++++++++++++++++++++++++-------------- - 1 file changed, 40 insertions(+), 18 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -47,11 +47,12 @@ enum { - SFP_E_TIMEOUT, - - SFP_MOD_EMPTY = 0, -+ SFP_MOD_ERROR, - SFP_MOD_PROBE, -+ SFP_MOD_WAITDEV, - SFP_MOD_HPOWER, - SFP_MOD_WAITPWR, - SFP_MOD_PRESENT, -- SFP_MOD_ERROR, - - SFP_DEV_DETACHED = 0, - SFP_DEV_DOWN, -@@ -70,11 +71,12 @@ enum { - - static const char * const mod_state_strings[] = { - [SFP_MOD_EMPTY] = "empty", -+ [SFP_MOD_ERROR] = "error", - [SFP_MOD_PROBE] = "probe", -+ [SFP_MOD_WAITDEV] = "waitdev", - [SFP_MOD_HPOWER] = "hpower", - [SFP_MOD_WAITPWR] = "waitpwr", - [SFP_MOD_PRESENT] = "present", -- [SFP_MOD_ERROR] = "error", - }; - - static const char *mod_state_to_str(unsigned short mod_state) -@@ -1544,16 +1546,13 @@ static int sfp_sm_mod_probe(struct sfp * - if (ret < 0) - return ret; - -- ret = sfp_module_insert(sfp->sfp_bus, &sfp->id); -- if (ret < 0) -- return ret; -- - return 0; - } - - static void sfp_sm_mod_remove(struct sfp *sfp) - { -- sfp_module_remove(sfp->sfp_bus); -+ if (sfp->sm_mod_state > SFP_MOD_WAITDEV) -+ sfp_module_remove(sfp->sfp_bus); - - sfp_hwmon_remove(sfp); - -@@ -1604,12 +1603,12 @@ static void sfp_sm_module(struct sfp *sf - } - - /* Handle device detach globally */ -- if (sfp->sm_dev_state < SFP_DEV_DOWN) { -+ if (sfp->sm_dev_state < SFP_DEV_DOWN && -+ sfp->sm_mod_state > SFP_MOD_WAITDEV) { - if (sfp->module_power_mW > 1000 && - sfp->sm_mod_state > SFP_MOD_HPOWER) - sfp_sm_mod_hpower(sfp, false); -- if (sfp->sm_mod_state > SFP_MOD_EMPTY) -- sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); -+ sfp_sm_mod_next(sfp, SFP_MOD_WAITDEV, 0); - return; - } - -@@ -1620,6 +1619,7 @@ static void sfp_sm_module(struct sfp *sf - break; - - case SFP_MOD_PROBE: -+ /* Wait for T_PROBE_INIT to time out */ - if (event != SFP_E_TIMEOUT) - break; - -@@ -1633,6 +1633,20 @@ static void sfp_sm_module(struct sfp *sf - break; - } - -+ sfp_sm_mod_next(sfp, SFP_MOD_WAITDEV, 0); -+ /* fall through */ -+ case SFP_MOD_WAITDEV: -+ /* Ensure that the device is attached before proceeding */ -+ if (sfp->sm_dev_state < SFP_DEV_DOWN) -+ break; -+ -+ /* Report the module insertion to the upstream device */ -+ err = sfp_module_insert(sfp->sfp_bus, &sfp->id); -+ if (err < 0) { -+ sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -+ break; -+ } -+ - /* If this is a power level 1 module, we are done */ - if (sfp->module_power_mW <= 1000) - goto insert; -@@ -1642,12 +1656,17 @@ static void sfp_sm_module(struct sfp *sf - case SFP_MOD_HPOWER: - /* Enable high power mode */ - err = sfp_sm_mod_hpower(sfp, true); -- if (err == 0) -- sfp_sm_mod_next(sfp, SFP_MOD_WAITPWR, T_HPOWER_LEVEL); -- else if (err != -EAGAIN) -- sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -- else -- sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ if (err < 0) { -+ if (err != -EAGAIN) { -+ sfp_module_remove(sfp->sfp_bus); -+ sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -+ } else { -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ } -+ break; -+ } -+ -+ sfp_sm_mod_next(sfp, SFP_MOD_WAITPWR, T_HPOWER_LEVEL); - break; - - case SFP_MOD_WAITPWR: -@@ -1815,8 +1834,6 @@ static void sfp_sm_event(struct sfp *sfp - static void sfp_attach(struct sfp *sfp) - { - sfp_sm_event(sfp, SFP_E_DEV_ATTACH); -- if (sfp->state & SFP_F_PRESENT) -- sfp_sm_event(sfp, SFP_E_INSERT); - } - - static void sfp_detach(struct sfp *sfp) -@@ -2084,6 +2101,11 @@ static int sfp_probe(struct platform_dev - sfp->state |= SFP_F_RATE_SELECT; - sfp_set_state(sfp, sfp->state); - sfp_module_tx_disable(sfp); -+ if (sfp->state & SFP_F_PRESENT) { -+ rtnl_lock(); -+ sfp_sm_event(sfp, SFP_E_INSERT); -+ rtnl_unlock(); -+ } - - for (i = 0; i < GPIO_MAX; i++) { - if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i]) diff --git a/target/linux/generic/backport-5.4/735-v5.5-net-sfp-allow-sfp-to-probe-slow-to-initialise-GPON-m.patch b/target/linux/generic/backport-5.4/735-v5.5-net-sfp-allow-sfp-to-probe-slow-to-initialise-GPON-m.patch deleted file mode 100644 index ab1ae753d7..0000000000 --- a/target/linux/generic/backport-5.4/735-v5.5-net-sfp-allow-sfp-to-probe-slow-to-initialise-GPON-m.patch +++ /dev/null @@ -1,110 +0,0 @@ -From fb56cd08880aff8fb030e684fa4311bef712a499 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 5 Nov 2019 13:02:30 +0000 -Subject: [PATCH 633/660] net: sfp: allow sfp to probe slow to initialise GPON - modules - -Some GPON modules (e.g. Huawei MA5671A) take a significant amount of -time to start responding on the I2C bus, contary to the SFF -specifications. - -Work around this by implementing a two-level timeout strategy, where -we initially quickly retry for the module, and then use a slower retry -after we exceed a maximum number of quick attempts. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 38 ++++++++++++++++++++++++++++---------- - 1 file changed, 28 insertions(+), 10 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -167,9 +167,12 @@ static const enum gpiod_flags gpio_flags - * The SFF-8472 specifies t_serial ("Time from power on until module is - * ready for data transmission over the two wire serial bus.") as 300ms. - */ --#define T_SERIAL msecs_to_jiffies(300) --#define T_HPOWER_LEVEL msecs_to_jiffies(300) --#define T_PROBE_RETRY msecs_to_jiffies(100) -+#define T_SERIAL msecs_to_jiffies(300) -+#define T_HPOWER_LEVEL msecs_to_jiffies(300) -+#define T_PROBE_RETRY_INIT msecs_to_jiffies(100) -+#define R_PROBE_RETRY_INIT 10 -+#define T_PROBE_RETRY_SLOW msecs_to_jiffies(5000) -+#define R_PROBE_RETRY_SLOW 12 - - /* SFP modules appear to always have their PHY configured for bus address - * 0x56 (which with mdio-i2c, translates to a PHY address of 22). -@@ -204,6 +207,8 @@ struct sfp { - struct delayed_work timeout; - struct mutex sm_mutex; /* Protects state machine */ - unsigned char sm_mod_state; -+ unsigned char sm_mod_tries_init; -+ unsigned char sm_mod_tries; - unsigned char sm_dev_state; - unsigned short sm_state; - unsigned int sm_retries; -@@ -1457,7 +1462,7 @@ static int sfp_sm_mod_hpower(struct sfp - return 0; - } - --static int sfp_sm_mod_probe(struct sfp *sfp) -+static int sfp_sm_mod_probe(struct sfp *sfp, bool report) - { - /* SFP module inserted - read I2C data */ - struct sfp_eeprom_id id; -@@ -1467,7 +1472,8 @@ static int sfp_sm_mod_probe(struct sfp * - - ret = sfp_read(sfp, false, 0, &id, sizeof(id)); - if (ret < 0) { -- dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); -+ if (report) -+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); - return -EAGAIN; - } - -@@ -1614,8 +1620,11 @@ static void sfp_sm_module(struct sfp *sf - - switch (sfp->sm_mod_state) { - default: -- if (event == SFP_E_INSERT) -+ if (event == SFP_E_INSERT) { - sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_SERIAL); -+ sfp->sm_mod_tries_init = R_PROBE_RETRY_INIT; -+ sfp->sm_mod_tries = R_PROBE_RETRY_SLOW; -+ } - break; - - case SFP_MOD_PROBE: -@@ -1623,10 +1632,19 @@ static void sfp_sm_module(struct sfp *sf - if (event != SFP_E_TIMEOUT) - break; - -- err = sfp_sm_mod_probe(sfp); -+ err = sfp_sm_mod_probe(sfp, sfp->sm_mod_tries == 1); - if (err == -EAGAIN) { -- sfp_sm_set_timer(sfp, T_PROBE_RETRY); -- break; -+ if (sfp->sm_mod_tries_init && -+ --sfp->sm_mod_tries_init) { -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY_INIT); -+ break; -+ } else if (sfp->sm_mod_tries && --sfp->sm_mod_tries) { -+ if (sfp->sm_mod_tries == R_PROBE_RETRY_SLOW - 1) -+ dev_warn(sfp->dev, -+ "please wait, module slow to respond\n"); -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY_SLOW); -+ break; -+ } - } - if (err < 0) { - sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); -@@ -1661,7 +1679,7 @@ static void sfp_sm_module(struct sfp *sf - sfp_module_remove(sfp->sfp_bus); - sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0); - } else { -- sfp_sm_set_timer(sfp, T_PROBE_RETRY); -+ sfp_sm_set_timer(sfp, T_PROBE_RETRY_INIT); - } - break; - } diff --git a/target/linux/generic/backport-5.4/736-v5.5-net-sfp-allow-modules-with-slow-diagnostics-to-probe.patch b/target/linux/generic/backport-5.4/736-v5.5-net-sfp-allow-modules-with-slow-diagnostics-to-probe.patch deleted file mode 100644 index e6c1fd71d8..0000000000 --- a/target/linux/generic/backport-5.4/736-v5.5-net-sfp-allow-modules-with-slow-diagnostics-to-probe.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 559391fc20fae506adcb311b904cc544c76436c0 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Thu, 7 Nov 2019 18:52:07 +0000 -Subject: [PATCH 634/660] net: sfp: allow modules with slow diagnostics to - probe - -When a module is inserted, we attempt to read read the ID from address -0x50. Once we are able to read the ID, we immediately attempt to -initialise the hwmon support by reading from address 0x51. If this -fails, then we fall into error state, and assume that the module is -not usable. - -Modules such as the ALCATELLUCENT 3FE46541AA use a real EEPROM for -I2C address 0x50, which responds immediately. However, address 0x51 -is an emulated, which only becomes available once the on-board firmware -has booted. This prompts us to fall into the error state. - -Since the module may be usable without diagnostics, arrange for the -hwmon probe independent of the rest of the SFP itself, retrying every -5s for up to about 60s for the monitoring to become available, and -print an error message if it doesn't become available. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 96 +++++++++++++++++++++++++++++++++---------- - 1 file changed, 74 insertions(+), 22 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -218,6 +218,8 @@ struct sfp { - - #if IS_ENABLED(CONFIG_HWMON) - struct sfp_diag diag; -+ struct delayed_work hwmon_probe; -+ unsigned int hwmon_tries; - struct device *hwmon_dev; - char *hwmon_name; - #endif -@@ -1159,29 +1161,27 @@ static const struct hwmon_chip_info sfp_ - .info = sfp_hwmon_info, - }; - --static int sfp_hwmon_insert(struct sfp *sfp) -+static void sfp_hwmon_probe(struct work_struct *work) - { -+ struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work); - int err, i; - -- if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE) -- return 0; -- -- if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) -- return 0; -- -- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) -- /* This driver in general does not support address -- * change. -- */ -- return 0; -- - err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag)); -- if (err < 0) -- return err; -+ if (err < 0) { -+ if (sfp->hwmon_tries--) { -+ mod_delayed_work(system_wq, &sfp->hwmon_probe, -+ T_PROBE_RETRY_SLOW); -+ } else { -+ dev_warn(sfp->dev, "hwmon probe failed: %d\n", err); -+ } -+ return; -+ } - - sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL); -- if (!sfp->hwmon_name) -- return -ENODEV; -+ if (!sfp->hwmon_name) { -+ dev_err(sfp->dev, "out of memory for hwmon name\n"); -+ return; -+ } - - for (i = 0; sfp->hwmon_name[i]; i++) - if (hwmon_is_bad_char(sfp->hwmon_name[i])) -@@ -1191,18 +1191,52 @@ static int sfp_hwmon_insert(struct sfp * - sfp->hwmon_name, sfp, - &sfp_hwmon_chip_info, - NULL); -+ if (IS_ERR(sfp->hwmon_dev)) -+ dev_err(sfp->dev, "failed to register hwmon device: %ld\n", -+ PTR_ERR(sfp->hwmon_dev)); -+} -+ -+static int sfp_hwmon_insert(struct sfp *sfp) -+{ -+ if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE) -+ return 0; -+ -+ if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) -+ return 0; -+ -+ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) -+ /* This driver in general does not support address -+ * change. -+ */ -+ return 0; -+ -+ mod_delayed_work(system_wq, &sfp->hwmon_probe, 1); -+ sfp->hwmon_tries = R_PROBE_RETRY_SLOW; - -- return PTR_ERR_OR_ZERO(sfp->hwmon_dev); -+ return 0; - } - - static void sfp_hwmon_remove(struct sfp *sfp) - { -+ cancel_delayed_work_sync(&sfp->hwmon_probe); - if (!IS_ERR_OR_NULL(sfp->hwmon_dev)) { - hwmon_device_unregister(sfp->hwmon_dev); - sfp->hwmon_dev = NULL; - kfree(sfp->hwmon_name); - } - } -+ -+static int sfp_hwmon_init(struct sfp *sfp) -+{ -+ INIT_DELAYED_WORK(&sfp->hwmon_probe, sfp_hwmon_probe); -+ -+ return 0; -+} -+ -+static void sfp_hwmon_exit(struct sfp *sfp) -+{ -+ cancel_delayed_work_sync(&sfp->hwmon_probe); -+} - #else - static int sfp_hwmon_insert(struct sfp *sfp) - { -@@ -1212,6 +1246,15 @@ static int sfp_hwmon_insert(struct sfp * - static void sfp_hwmon_remove(struct sfp *sfp) - { - } -+ -+static int sfp_hwmon_init(struct sfp *sfp) -+{ -+ return 0; -+} -+ -+static void sfp_hwmon_exit(struct sfp *sfp) -+{ -+} - #endif - - /* Helpers */ -@@ -1548,10 +1591,6 @@ static int sfp_sm_mod_probe(struct sfp * - if (ret < 0) - return ret; - -- ret = sfp_hwmon_insert(sfp); -- if (ret < 0) -- return ret; -- - return 0; - } - -@@ -1700,6 +1739,15 @@ static void sfp_sm_module(struct sfp *sf - case SFP_MOD_ERROR: - break; - } -+ -+#if IS_ENABLED(CONFIG_HWMON) -+ if (sfp->sm_mod_state >= SFP_MOD_WAITDEV && -+ IS_ERR_OR_NULL(sfp->hwmon_dev)) { -+ err = sfp_hwmon_insert(sfp); -+ if (err) -+ dev_warn(sfp->dev, "hwmon probe failed: %d\n", err); -+ } -+#endif - } - - static void sfp_sm_main(struct sfp *sfp, unsigned int event) -@@ -2001,6 +2049,8 @@ static struct sfp *sfp_alloc(struct devi - INIT_DELAYED_WORK(&sfp->poll, sfp_poll); - INIT_DELAYED_WORK(&sfp->timeout, sfp_timeout); - -+ sfp_hwmon_init(sfp); -+ - return sfp; - } - -@@ -2008,6 +2058,8 @@ static void sfp_cleanup(void *data) - { - struct sfp *sfp = data; - -+ sfp_hwmon_exit(sfp); -+ - cancel_delayed_work_sync(&sfp->poll); - cancel_delayed_work_sync(&sfp->timeout); - if (sfp->i2c_mii) { diff --git a/target/linux/generic/backport-5.4/737-v5.5-net-phy-add-core-phylib-sfp-support.patch b/target/linux/generic/backport-5.4/737-v5.5-net-phy-add-core-phylib-sfp-support.patch deleted file mode 100644 index edfe151725..0000000000 --- a/target/linux/generic/backport-5.4/737-v5.5-net-phy-add-core-phylib-sfp-support.patch +++ /dev/null @@ -1,183 +0,0 @@ -From eb156db588ac583cdae7b91eaac9c0ad3a358e63 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Sun, 15 Sep 2019 20:05:34 +0100 -Subject: [PATCH 635/660] net: phy: add core phylib sfp support - -Add core phylib help for supporting SFP sockets on PHYs. This provides -a mechanism to inform the SFP layer about PHY up/down events, and also -unregister the SFP bus when the PHY is going away. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/phy.c | 7 ++++ - drivers/net/phy/phy_device.c | 66 ++++++++++++++++++++++++++++++++++++ - include/linux/phy.h | 11 ++++++ - 3 files changed, 84 insertions(+) - ---- a/drivers/net/phy/phy.c -+++ b/drivers/net/phy/phy.c -@@ -23,6 +23,7 @@ - #include <linux/ethtool.h> - #include <linux/phy.h> - #include <linux/phy_led_triggers.h> -+#include <linux/sfp.h> - #include <linux/workqueue.h> - #include <linux/mdio.h> - #include <linux/io.h> -@@ -863,6 +864,9 @@ void phy_stop(struct phy_device *phydev) - - mutex_lock(&phydev->lock); - -+ if (phydev->sfp_bus) -+ sfp_upstream_stop(phydev->sfp_bus); -+ - phydev->state = PHY_HALTED; - - mutex_unlock(&phydev->lock); -@@ -925,6 +929,9 @@ void phy_state_machine(struct work_struc - - old_state = phydev->state; - -+ if (phydev->sfp_bus) -+ sfp_upstream_start(phydev->sfp_bus); -+ - switch (phydev->state) { - case PHY_DOWN: - case PHY_READY: ---- a/drivers/net/phy/phy_device.c -+++ b/drivers/net/phy/phy_device.c -@@ -27,6 +27,7 @@ - #include <linux/bitmap.h> - #include <linux/phy.h> - #include <linux/phy_led_triggers.h> -+#include <linux/sfp.h> - #include <linux/mdio.h> - #include <linux/io.h> - #include <linux/uaccess.h> -@@ -1185,6 +1186,65 @@ phy_standalone_show(struct device *dev, - static DEVICE_ATTR_RO(phy_standalone); - - /** -+ * phy_sfp_attach - attach the SFP bus to the PHY upstream network device -+ * @upstream: pointer to the phy device -+ * @bus: sfp bus representing cage being attached -+ * -+ * This is used to fill in the sfp_upstream_ops .attach member. -+ */ -+void phy_sfp_attach(void *upstream, struct sfp_bus *bus) -+{ -+ struct phy_device *phydev = upstream; -+ -+ if (phydev->attached_dev) -+ phydev->attached_dev->sfp_bus = bus; -+ phydev->sfp_bus_attached = true; -+} -+EXPORT_SYMBOL(phy_sfp_attach); -+ -+/** -+ * phy_sfp_detach - detach the SFP bus from the PHY upstream network device -+ * @upstream: pointer to the phy device -+ * @bus: sfp bus representing cage being attached -+ * -+ * This is used to fill in the sfp_upstream_ops .detach member. -+ */ -+void phy_sfp_detach(void *upstream, struct sfp_bus *bus) -+{ -+ struct phy_device *phydev = upstream; -+ -+ if (phydev->attached_dev) -+ phydev->attached_dev->sfp_bus = NULL; -+ phydev->sfp_bus_attached = false; -+} -+EXPORT_SYMBOL(phy_sfp_detach); -+ -+/** -+ * phy_sfp_probe - probe for a SFP cage attached to this PHY device -+ * @phydev: Pointer to phy_device -+ * @ops: SFP's upstream operations -+ */ -+int phy_sfp_probe(struct phy_device *phydev, -+ const struct sfp_upstream_ops *ops) -+{ -+ struct sfp_bus *bus; -+ int ret; -+ -+ if (phydev->mdio.dev.fwnode) { -+ bus = sfp_bus_find_fwnode(phydev->mdio.dev.fwnode); -+ if (IS_ERR(bus)) -+ return PTR_ERR(bus); -+ -+ phydev->sfp_bus = bus; -+ -+ ret = sfp_bus_add_upstream(bus, phydev, ops); -+ sfp_bus_put(bus); -+ } -+ return 0; -+} -+EXPORT_SYMBOL(phy_sfp_probe); -+ -+/** - * phy_attach_direct - attach a network device to a given PHY device pointer - * @dev: network device to attach - * @phydev: Pointer to phy_device to attach -@@ -1261,6 +1321,9 @@ int phy_attach_direct(struct net_device - dev->phydev = phydev; - } - -+ if (phydev->sfp_bus_attached) -+ dev->sfp_bus = phydev->sfp_bus; -+ - /* Some Ethernet drivers try to connect to a PHY device before - * calling register_netdevice() -> netdev_register_kobject() and - * does the dev->dev.kobj initialization. Here we only check for -@@ -2291,6 +2354,9 @@ static int phy_remove(struct device *dev - phydev->state = PHY_DOWN; - mutex_unlock(&phydev->lock); - -+ sfp_bus_del_upstream(phydev->sfp_bus); -+ phydev->sfp_bus = NULL; -+ - if (phydev->drv && phydev->drv->remove) { - phydev->drv->remove(phydev); - ---- a/include/linux/phy.h -+++ b/include/linux/phy.h -@@ -203,6 +203,8 @@ static inline const char *phy_modes(phy_ - - struct device; - struct phylink; -+struct sfp_bus; -+struct sfp_upstream_ops; - struct sk_buff; - - /* -@@ -343,6 +345,8 @@ struct phy_c45_device_ids { - * dev_flags: Device-specific flags used by the PHY driver. - * irq: IRQ number of the PHY's interrupt (-1 if none) - * phy_timer: The timer for handling the state machine -+ * sfp_bus_attached: flag indicating whether the SFP bus has been attached -+ * sfp_bus: SFP bus attached to this PHY's fiber port - * attached_dev: The attached enet driver's device instance ptr - * adjust_link: Callback for the enet controller to respond to - * changes in the link state. -@@ -434,6 +438,9 @@ struct phy_device { - - struct mutex lock; - -+ /* This may be modified under the rtnl lock */ -+ bool sfp_bus_attached; -+ struct sfp_bus *sfp_bus; - struct phylink *phylink; - struct net_device *attached_dev; - -@@ -1023,6 +1030,10 @@ int phy_suspend(struct phy_device *phyde - int phy_resume(struct phy_device *phydev); - int __phy_resume(struct phy_device *phydev); - int phy_loopback(struct phy_device *phydev, bool enable); -+void phy_sfp_attach(void *upstream, struct sfp_bus *bus); -+void phy_sfp_detach(void *upstream, struct sfp_bus *bus); -+int phy_sfp_probe(struct phy_device *phydev, -+ const struct sfp_upstream_ops *ops); - struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, - phy_interface_t interface); - struct phy_device *phy_find_first(struct mii_bus *bus); diff --git a/target/linux/generic/backport-5.4/738-v5.5-net-phy-marvell10g-add-SFP-support.patch b/target/linux/generic/backport-5.4/738-v5.5-net-phy-marvell10g-add-SFP-support.patch deleted file mode 100644 index 40a666a3f2..0000000000 --- a/target/linux/generic/backport-5.4/738-v5.5-net-phy-marvell10g-add-SFP-support.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 0836d9fb41ed90090ef4af0d7abe784ee7706f80 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 14 Apr 2017 14:21:25 +0100 -Subject: [PATCH 636/660] net: phy: marvell10g: add SFP+ support - -Add support for SFP+ cages to the Marvell 10G PHY driver. This is -slightly complicated by the way phylib works in that we need to use -a multi-step process to attach the SFP bus, and we also need to track -the phylink state machine to know when the module's transmit disable -signal should change state. - -With appropriate DT changes, this allows the SFP+ canges on the -Macchiatobin platform to be functional. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/marvell10g.c | 25 ++++++++++++++++++++++++- - 1 file changed, 24 insertions(+), 1 deletion(-) - ---- a/drivers/net/phy/marvell10g.c -+++ b/drivers/net/phy/marvell10g.c -@@ -26,6 +26,7 @@ - #include <linux/hwmon.h> - #include <linux/marvell_phy.h> - #include <linux/phy.h> -+#include <linux/sfp.h> - - #define MV_PHY_ALASKA_NBT_QUIRK_MASK 0xfffffffe - #define MV_PHY_ALASKA_NBT_QUIRK_REV (MARVELL_PHY_ID_88X3310 | 0xa) -@@ -206,6 +207,28 @@ static int mv3310_hwmon_probe(struct phy - } - #endif - -+static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) -+{ -+ struct phy_device *phydev = upstream; -+ __ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, }; -+ phy_interface_t iface; -+ -+ sfp_parse_support(phydev->sfp_bus, id, support); -+ iface = sfp_select_interface(phydev->sfp_bus, id, support); -+ -+ if (iface != PHY_INTERFACE_MODE_10GKR) { -+ dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n"); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+static const struct sfp_upstream_ops mv3310_sfp_ops = { -+ .attach = phy_sfp_attach, -+ .detach = phy_sfp_detach, -+ .module_insert = mv3310_sfp_insert, -+}; -+ - static int mv3310_probe(struct phy_device *phydev) - { - struct mv3310_priv *priv; -@@ -236,7 +259,7 @@ static int mv3310_probe(struct phy_devic - if (ret) - return ret; - -- return 0; -+ return phy_sfp_probe(phydev, &mv3310_sfp_ops); - } - - static int mv3310_suspend(struct phy_device *phydev) diff --git a/target/linux/generic/backport-5.4/739-v5.5-net-phylink-update-to-use-phy_support_asym_pause.patch b/target/linux/generic/backport-5.4/739-v5.5-net-phylink-update-to-use-phy_support_asym_pause.patch deleted file mode 100644 index 84a8214ca5..0000000000 --- a/target/linux/generic/backport-5.4/739-v5.5-net-phylink-update-to-use-phy_support_asym_pause.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 09d7d8395ec61fba4392b35baa6f71c4e36489df Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 8 Nov 2019 15:18:02 +0000 -Subject: [PATCH 637/660] net: phylink: update to use phy_support_asym_pause() - -Use phy_support_asym_pause() rather than open-coding it. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/phylink.c | 17 +++++++---------- - 1 file changed, 7 insertions(+), 10 deletions(-) - ---- a/drivers/net/phy/phylink.c -+++ b/drivers/net/phy/phylink.c -@@ -718,11 +718,6 @@ static int phylink_bringup_phy(struct ph - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); - int ret; - -- memset(&config, 0, sizeof(config)); -- linkmode_copy(supported, phy->supported); -- linkmode_copy(config.advertising, phy->advertising); -- config.interface = pl->link_config.interface; -- - /* - * This is the new way of dealing with flow control for PHYs, - * as described by Timur Tabi in commit 529ed1275263 ("net: phy: -@@ -730,10 +725,12 @@ static int phylink_bringup_phy(struct ph - * using our validate call to the MAC, we rely upon the MAC - * clearing the bits from both supported and advertising fields. - */ -- if (phylink_test(supported, Pause)) -- phylink_set(config.advertising, Pause); -- if (phylink_test(supported, Asym_Pause)) -- phylink_set(config.advertising, Asym_Pause); -+ phy_support_asym_pause(phy); -+ -+ memset(&config, 0, sizeof(config)); -+ linkmode_copy(supported, phy->supported); -+ linkmode_copy(config.advertising, phy->advertising); -+ config.interface = pl->link_config.interface; - - ret = phylink_validate(pl, supported, &config); - if (ret) diff --git a/target/linux/generic/backport-5.4/744-v5.5-net-sfp-soft-status-and-control-support.patch b/target/linux/generic/backport-5.4/744-v5.5-net-sfp-soft-status-and-control-support.patch deleted file mode 100644 index abc9f65f09..0000000000 --- a/target/linux/generic/backport-5.4/744-v5.5-net-sfp-soft-status-and-control-support.patch +++ /dev/null @@ -1,225 +0,0 @@ -From 40e0b3b15f7da92e6b065292b14af7b9bfb1c6e0 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Fri, 13 Sep 2019 23:00:35 +0100 -Subject: [PATCH 642/660] net: sfp: soft status and control support - -Add support for the soft status and control register, which allows -TX_FAULT and RX_LOS to be monitored and TX_DISABLE to be set. We -make use of this when the board does not support GPIOs for these -signals. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/net/phy/sfp.c | 110 ++++++++++++++++++++++++++++++++++-------- - include/linux/sfp.h | 4 ++ - 2 files changed, 94 insertions(+), 20 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -201,7 +201,10 @@ struct sfp { - struct gpio_desc *gpio[GPIO_MAX]; - int gpio_irq[GPIO_MAX]; - -+ bool need_poll; -+ - struct mutex st_mutex; /* Protects state */ -+ unsigned int state_soft_mask; - unsigned int state; - struct delayed_work poll; - struct delayed_work timeout; -@@ -395,24 +398,90 @@ static int sfp_i2c_configure(struct sfp - } - - /* Interface */ --static unsigned int sfp_get_state(struct sfp *sfp) -+static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) - { -- return sfp->get_state(sfp); -+ return sfp->read(sfp, a2, addr, buf, len); - } - --static void sfp_set_state(struct sfp *sfp, unsigned int state) -+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) - { -- sfp->set_state(sfp, state); -+ return sfp->write(sfp, a2, addr, buf, len); - } - --static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) -+static unsigned int sfp_soft_get_state(struct sfp *sfp) - { -- return sfp->read(sfp, a2, addr, buf, len); -+ unsigned int state = 0; -+ u8 status; -+ -+ if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) == -+ sizeof(status)) { -+ if (status & SFP_STATUS_RX_LOS) -+ state |= SFP_F_LOS; -+ if (status & SFP_STATUS_TX_FAULT) -+ state |= SFP_F_TX_FAULT; -+ } -+ -+ return state & sfp->state_soft_mask; - } - --static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) -+static void sfp_soft_set_state(struct sfp *sfp, unsigned int state) - { -- return sfp->write(sfp, a2, addr, buf, len); -+ u8 status; -+ -+ if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) == -+ sizeof(status)) { -+ if (state & SFP_F_TX_DISABLE) -+ status |= SFP_STATUS_TX_DISABLE_FORCE; -+ else -+ status &= ~SFP_STATUS_TX_DISABLE_FORCE; -+ -+ sfp_write(sfp, true, SFP_STATUS, &status, sizeof(status)); -+ } -+} -+ -+static void sfp_soft_start_poll(struct sfp *sfp) -+{ -+ const struct sfp_eeprom_id *id = &sfp->id; -+ -+ sfp->state_soft_mask = 0; -+ if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_DISABLE && -+ !sfp->gpio[GPIO_TX_DISABLE]) -+ sfp->state_soft_mask |= SFP_F_TX_DISABLE; -+ if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_FAULT && -+ !sfp->gpio[GPIO_TX_FAULT]) -+ sfp->state_soft_mask |= SFP_F_TX_FAULT; -+ if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RX_LOS && -+ !sfp->gpio[GPIO_LOS]) -+ sfp->state_soft_mask |= SFP_F_LOS; -+ -+ if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) && -+ !sfp->need_poll) -+ mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); -+} -+ -+static void sfp_soft_stop_poll(struct sfp *sfp) -+{ -+ sfp->state_soft_mask = 0; -+} -+ -+static unsigned int sfp_get_state(struct sfp *sfp) -+{ -+ unsigned int state = sfp->get_state(sfp); -+ -+ if (state & SFP_F_PRESENT && -+ sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT)) -+ state |= sfp_soft_get_state(sfp); -+ -+ return state; -+} -+ -+static void sfp_set_state(struct sfp *sfp, unsigned int state) -+{ -+ sfp->set_state(sfp, state); -+ -+ if (state & SFP_F_PRESENT && -+ sfp->state_soft_mask & SFP_F_TX_DISABLE) -+ sfp_soft_set_state(sfp, state); - } - - static unsigned int sfp_check(void *buf, size_t len) -@@ -1407,11 +1476,6 @@ static void sfp_sm_fault(struct sfp *sfp - } - } - --static void sfp_sm_mod_init(struct sfp *sfp) --{ -- sfp_module_tx_enable(sfp); --} -- - static void sfp_sm_probe_for_phy(struct sfp *sfp) - { - /* Setting the serdes link mode is guesswork: there's no -@@ -1574,7 +1638,7 @@ static int sfp_sm_mod_probe(struct sfp * - (int)sizeof(id.ext.datecode), id.ext.datecode); - - /* Check whether we support this module */ -- if (!sfp->type->module_supported(&sfp->id)) { -+ if (!sfp->type->module_supported(&id)) { - dev_err(sfp->dev, - "module is not supported - phys id 0x%02x 0x%02x\n", - sfp->id.base.phys_id, sfp->id.base.phys_ext_id); -@@ -1764,6 +1828,7 @@ static void sfp_sm_main(struct sfp *sfp, - if (sfp->mod_phy) - sfp_sm_phy_detach(sfp); - sfp_module_tx_disable(sfp); -+ sfp_soft_stop_poll(sfp); - sfp_sm_next(sfp, SFP_S_DOWN, 0); - return; - } -@@ -1775,7 +1840,10 @@ static void sfp_sm_main(struct sfp *sfp, - sfp->sm_dev_state != SFP_DEV_UP) - break; - -- sfp_sm_mod_init(sfp); -+ if (!(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)) -+ sfp_soft_start_poll(sfp); -+ -+ sfp_module_tx_enable(sfp); - - /* Initialise the fault clearance retries */ - sfp->sm_retries = 5; -@@ -2031,7 +2099,10 @@ static void sfp_poll(struct work_struct - struct sfp *sfp = container_of(work, struct sfp, poll.work); - - sfp_check_state(sfp); -- mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); -+ -+ if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) || -+ sfp->need_poll) -+ mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); - } - - static struct sfp *sfp_alloc(struct device *dev) -@@ -2076,7 +2147,6 @@ static int sfp_probe(struct platform_dev - const struct sff_data *sff; - struct i2c_adapter *i2c; - struct sfp *sfp; -- bool poll = false; - int err, i; - - sfp = sfp_alloc(&pdev->dev); -@@ -2184,7 +2254,7 @@ static int sfp_probe(struct platform_dev - sfp->gpio_irq[i] = gpiod_to_irq(sfp->gpio[i]); - if (sfp->gpio_irq[i] < 0) { - sfp->gpio_irq[i] = 0; -- poll = true; -+ sfp->need_poll = true; - continue; - } - -@@ -2196,11 +2266,11 @@ static int sfp_probe(struct platform_dev - dev_name(sfp->dev), sfp); - if (err) { - sfp->gpio_irq[i] = 0; -- poll = true; -+ sfp->need_poll = true; - } - } - -- if (poll) -+ if (sfp->need_poll) - mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); - - /* We could have an issue in cases no Tx disable pin is available or ---- a/include/linux/sfp.h -+++ b/include/linux/sfp.h -@@ -428,6 +428,10 @@ enum { - SFP_TEC_CUR = 0x6c, - - SFP_STATUS = 0x6e, -+ SFP_STATUS_TX_DISABLE = BIT(7), -+ SFP_STATUS_TX_DISABLE_FORCE = BIT(6), -+ SFP_STATUS_TX_FAULT = BIT(2), -+ SFP_STATUS_RX_LOS = BIT(1), - SFP_ALARM0 = 0x70, - SFP_ALARM0_TEMP_HIGH = BIT(7), - SFP_ALARM0_TEMP_LOW = BIT(6), diff --git a/target/linux/generic/backport-5.4/745-v5.7-net-dsa-mt7530-add-support-for-port-mirroring.patch b/target/linux/generic/backport-5.4/745-v5.7-net-dsa-mt7530-add-support-for-port-mirroring.patch deleted file mode 100644 index 71a06997c3..0000000000 --- a/target/linux/generic/backport-5.4/745-v5.7-net-dsa-mt7530-add-support-for-port-mirroring.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 37feab6076aa816ed72fe836759a485353241916 Mon Sep 17 00:00:00 2001 -From: DENG Qingfang <dqfext@gmail.com> -Date: Fri, 6 Mar 2020 20:35:35 +0800 -Subject: net: dsa: mt7530: add support for port mirroring - -Add support for configuring port mirroring through the cls_matchall -classifier. We do a full ingress and/or egress capture towards a -capture port. -MT7530 supports one monitor port and multiple mirrored ports. - -Signed-off-by: DENG Qingfang <dqfext@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mt7530.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ - drivers/net/dsa/mt7530.h | 7 ++++++ - 2 files changed, 67 insertions(+) - ---- a/drivers/net/dsa/mt7530.c -+++ b/drivers/net/dsa/mt7530.c -@@ -1143,6 +1143,64 @@ mt7530_port_vlan_del(struct dsa_switch * - return 0; - } - -+static int mt7530_port_mirror_add(struct dsa_switch *ds, int port, -+ struct dsa_mall_mirror_tc_entry *mirror, -+ bool ingress) -+{ -+ struct mt7530_priv *priv = ds->priv; -+ u32 val; -+ -+ /* Check for existent entry */ -+ if ((ingress ? priv->mirror_rx : priv->mirror_tx) & BIT(port)) -+ return -EEXIST; -+ -+ val = mt7530_read(priv, MT7530_MFC); -+ -+ /* MT7530 only supports one monitor port */ -+ if (val & MIRROR_EN && MIRROR_PORT(val) != mirror->to_local_port) -+ return -EEXIST; -+ -+ val |= MIRROR_EN; -+ val &= ~MIRROR_MASK; -+ val |= mirror->to_local_port; -+ mt7530_write(priv, MT7530_MFC, val); -+ -+ val = mt7530_read(priv, MT7530_PCR_P(port)); -+ if (ingress) { -+ val |= PORT_RX_MIR; -+ priv->mirror_rx |= BIT(port); -+ } else { -+ val |= PORT_TX_MIR; -+ priv->mirror_tx |= BIT(port); -+ } -+ mt7530_write(priv, MT7530_PCR_P(port), val); -+ -+ return 0; -+} -+ -+static void mt7530_port_mirror_del(struct dsa_switch *ds, int port, -+ struct dsa_mall_mirror_tc_entry *mirror) -+{ -+ struct mt7530_priv *priv = ds->priv; -+ u32 val; -+ -+ val = mt7530_read(priv, MT7530_PCR_P(port)); -+ if (mirror->ingress) { -+ val &= ~PORT_RX_MIR; -+ priv->mirror_rx &= ~BIT(port); -+ } else { -+ val &= ~PORT_TX_MIR; -+ priv->mirror_tx &= ~BIT(port); -+ } -+ mt7530_write(priv, MT7530_PCR_P(port), val); -+ -+ if (!priv->mirror_rx && !priv->mirror_tx) { -+ val = mt7530_read(priv, MT7530_MFC); -+ val &= ~MIRROR_EN; -+ mt7530_write(priv, MT7530_MFC, val); -+ } -+} -+ - static enum dsa_tag_protocol - mtk_get_tag_protocol(struct dsa_switch *ds, int port) - { -@@ -1520,6 +1578,8 @@ static const struct dsa_switch_ops mt753 - .port_vlan_prepare = mt7530_port_vlan_prepare, - .port_vlan_add = mt7530_port_vlan_add, - .port_vlan_del = mt7530_port_vlan_del, -+ .port_mirror_add = mt7530_port_mirror_add, -+ .port_mirror_del = mt7530_port_mirror_del, - .phylink_validate = mt7530_phylink_validate, - .phylink_mac_link_state = mt7530_phylink_mac_link_state, - .phylink_mac_config = mt7530_phylink_mac_config, ---- a/drivers/net/dsa/mt7530.h -+++ b/drivers/net/dsa/mt7530.h -@@ -37,6 +37,9 @@ enum { - #define CPU_EN BIT(7) - #define CPU_PORT(x) ((x) << 4) - #define CPU_MASK (0xf << 4) -+#define MIRROR_EN BIT(3) -+#define MIRROR_PORT(x) ((x) & 0x7) -+#define MIRROR_MASK 0x7 - - /* Registers for address table access */ - #define MT7530_ATA1 0x74 -@@ -142,6 +145,8 @@ enum mt7530_stp_state { - - /* Register for port control */ - #define MT7530_PCR_P(x) (0x2004 + ((x) * 0x100)) -+#define PORT_TX_MIR BIT(9) -+#define PORT_RX_MIR BIT(8) - #define PORT_VLAN(x) ((x) & 0x3) - - enum mt7530_port_mode { -@@ -464,6 +469,8 @@ struct mt7530_priv { - phy_interface_t p6_interface; - phy_interface_t p5_interface; - unsigned int p5_intf_sel; -+ u8 mirror_rx; -+ u8 mirror_tx; - - struct mt7530_port ports[MT7530_NUM_PORTS]; - /* protect among processes for registers access*/ diff --git a/target/linux/generic/backport-5.4/746-v5.5-net-dsa-mv88e6xxx-Split-monitor-port-configuration.patch b/target/linux/generic/backport-5.4/746-v5.5-net-dsa-mv88e6xxx-Split-monitor-port-configuration.patch deleted file mode 100644 index 683178727c..0000000000 --- a/target/linux/generic/backport-5.4/746-v5.5-net-dsa-mv88e6xxx-Split-monitor-port-configuration.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 5c74c54ce6fff719999ff48f128cf4150ee4ff59 Mon Sep 17 00:00:00 2001 -From: Iwan R Timmer <irtimmer@gmail.com> -Date: Thu, 7 Nov 2019 22:11:13 +0100 -Subject: [PATCH] net: dsa: mv88e6xxx: Split monitor port configuration - -Separate the configuration of the egress and ingress monitor port. -This allows the port mirror functionality to do ingress and egress -port mirroring to separate ports. - -Signed-off-by: Iwan R Timmer <irtimmer@gmail.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mv88e6xxx/chip.c | 9 ++++++- - drivers/net/dsa/mv88e6xxx/chip.h | 9 ++++++- - drivers/net/dsa/mv88e6xxx/global1.c | 42 ++++++++++++++++++++--------- - drivers/net/dsa/mv88e6xxx/global1.h | 8 ++++-- - 4 files changed, 52 insertions(+), 16 deletions(-) - ---- a/drivers/net/dsa/mv88e6xxx/chip.c -+++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -2384,7 +2384,14 @@ static int mv88e6xxx_setup_upstream_port - - if (chip->info->ops->set_egress_port) { - err = chip->info->ops->set_egress_port(chip, -- upstream_port); -+ MV88E6XXX_EGRESS_DIR_INGRESS, -+ upstream_port); -+ if (err) -+ return err; -+ -+ err = chip->info->ops->set_egress_port(chip, -+ MV88E6XXX_EGRESS_DIR_EGRESS, -+ upstream_port); - if (err) - return err; - } ---- a/drivers/net/dsa/mv88e6xxx/chip.h -+++ b/drivers/net/dsa/mv88e6xxx/chip.h -@@ -33,6 +33,11 @@ enum mv88e6xxx_egress_mode { - MV88E6XXX_EGRESS_MODE_ETHERTYPE, - }; - -+enum mv88e6xxx_egress_direction { -+ MV88E6XXX_EGRESS_DIR_INGRESS, -+ MV88E6XXX_EGRESS_DIR_EGRESS, -+}; -+ - enum mv88e6xxx_frame_mode { - MV88E6XXX_FRAME_MODE_NORMAL, - MV88E6XXX_FRAME_MODE_DSA, -@@ -464,7 +469,9 @@ struct mv88e6xxx_ops { - int (*stats_get_stats)(struct mv88e6xxx_chip *chip, int port, - uint64_t *data); - int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port); -- int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port); -+ int (*set_egress_port)(struct mv88e6xxx_chip *chip, -+ enum mv88e6xxx_egress_direction direction, -+ int port); - - #define MV88E6XXX_CASCADE_PORT_NONE 0xe - #define MV88E6XXX_CASCADE_PORT_MULTIPLE 0xf ---- a/drivers/net/dsa/mv88e6xxx/global1.c -+++ b/drivers/net/dsa/mv88e6xxx/global1.c -@@ -294,7 +294,9 @@ int mv88e6250_g1_ieee_pri_map(struct mv8 - /* Offset 0x1a: Monitor Control */ - /* Offset 0x1a: Monitor & MGMT Control on some devices */ - --int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port) -+int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, -+ enum mv88e6xxx_egress_direction direction, -+ int port) - { - u16 reg; - int err; -@@ -303,11 +305,20 @@ int mv88e6095_g1_set_egress_port(struct - if (err) - return err; - -- reg &= ~(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK | -- MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); -- -- reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK) | -- port << __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); -+ switch (direction) { -+ case MV88E6XXX_EGRESS_DIR_INGRESS: -+ reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; -+ reg |= port << -+ __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); -+ break; -+ case MV88E6XXX_EGRESS_DIR_EGRESS: -+ reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; -+ reg |= port << -+ __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); -+ break; -+ default: -+ return -EINVAL; -+ } - - return mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg); - } -@@ -341,17 +352,24 @@ static int mv88e6390_g1_monitor_write(st - return mv88e6xxx_g1_write(chip, MV88E6390_G1_MONITOR_MGMT_CTL, reg); - } - --int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port) -+int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip, -+ enum mv88e6xxx_egress_direction direction, -+ int port) - { - u16 ptr; - int err; - -- ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST; -- err = mv88e6390_g1_monitor_write(chip, ptr, port); -- if (err) -- return err; -+ switch (direction) { -+ case MV88E6XXX_EGRESS_DIR_INGRESS: -+ ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST; -+ break; -+ case MV88E6XXX_EGRESS_DIR_EGRESS: -+ ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST; -+ break; -+ default: -+ return -EINVAL; -+ } - -- ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST; - err = mv88e6390_g1_monitor_write(chip, ptr, port); - if (err) - return err; ---- a/drivers/net/dsa/mv88e6xxx/global1.h -+++ b/drivers/net/dsa/mv88e6xxx/global1.h -@@ -289,8 +289,12 @@ int mv88e6095_g1_stats_set_histogram(str - int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip); - void mv88e6xxx_g1_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val); - int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip); --int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port); --int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip, int port); -+int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, -+ enum mv88e6xxx_egress_direction direction, -+ int port); -+int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip, -+ enum mv88e6xxx_egress_direction direction, -+ int port); - int mv88e6095_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port); - int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port); - int mv88e6390_g1_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); diff --git a/target/linux/generic/backport-5.4/747-v5.5-net-dsa-mv88e6xxx-Add-support-for-port-mirroring.patch b/target/linux/generic/backport-5.4/747-v5.5-net-dsa-mv88e6xxx-Add-support-for-port-mirroring.patch deleted file mode 100644 index a23f45075f..0000000000 --- a/target/linux/generic/backport-5.4/747-v5.5-net-dsa-mv88e6xxx-Add-support-for-port-mirroring.patch +++ /dev/null @@ -1,266 +0,0 @@ -From f0942e00a1abb6404ca4302c66497fc623676c11 Mon Sep 17 00:00:00 2001 -From: Iwan R Timmer <irtimmer@gmail.com> -Date: Thu, 7 Nov 2019 22:11:14 +0100 -Subject: [PATCH] net: dsa: mv88e6xxx: Add support for port mirroring - -Add support for configuring port mirroring through the cls_matchall -classifier. We do a full ingress and/or egress capture towards a -capture port. It allows setting a different capture port for ingress -and egress traffic. - -It keeps track of the mirrored ports and the destination ports to -prevent changes to the capture port while other ports are being -mirrored. - -Signed-off-by: Iwan R Timmer <irtimmer@gmail.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mv88e6xxx/chip.c | 76 +++++++++++++++++++++++++++++ - drivers/net/dsa/mv88e6xxx/chip.h | 6 +++ - drivers/net/dsa/mv88e6xxx/global1.c | 18 +++++-- - drivers/net/dsa/mv88e6xxx/port.c | 37 ++++++++++++++ - drivers/net/dsa/mv88e6xxx/port.h | 3 ++ - 5 files changed, 136 insertions(+), 4 deletions(-) - ---- a/drivers/net/dsa/mv88e6xxx/chip.c -+++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -4926,6 +4926,80 @@ static int mv88e6xxx_port_mdb_del(struct - return err; - } - -+static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port, -+ struct dsa_mall_mirror_tc_entry *mirror, -+ bool ingress) -+{ -+ enum mv88e6xxx_egress_direction direction = ingress ? -+ MV88E6XXX_EGRESS_DIR_INGRESS : -+ MV88E6XXX_EGRESS_DIR_EGRESS; -+ struct mv88e6xxx_chip *chip = ds->priv; -+ bool other_mirrors = false; -+ int i; -+ int err; -+ -+ if (!chip->info->ops->set_egress_port) -+ return -EOPNOTSUPP; -+ -+ mutex_lock(&chip->reg_lock); -+ if ((ingress ? chip->ingress_dest_port : chip->egress_dest_port) != -+ mirror->to_local_port) { -+ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) -+ other_mirrors |= ingress ? -+ chip->ports[i].mirror_ingress : -+ chip->ports[i].mirror_egress; -+ -+ /* Can't change egress port when other mirror is active */ -+ if (other_mirrors) { -+ err = -EBUSY; -+ goto out; -+ } -+ -+ err = chip->info->ops->set_egress_port(chip, -+ direction, -+ mirror->to_local_port); -+ if (err) -+ goto out; -+ } -+ -+ err = mv88e6xxx_port_set_mirror(chip, port, direction, true); -+out: -+ mutex_unlock(&chip->reg_lock); -+ -+ return err; -+} -+ -+static void mv88e6xxx_port_mirror_del(struct dsa_switch *ds, int port, -+ struct dsa_mall_mirror_tc_entry *mirror) -+{ -+ enum mv88e6xxx_egress_direction direction = mirror->ingress ? -+ MV88E6XXX_EGRESS_DIR_INGRESS : -+ MV88E6XXX_EGRESS_DIR_EGRESS; -+ struct mv88e6xxx_chip *chip = ds->priv; -+ bool other_mirrors = false; -+ int i; -+ -+ mutex_lock(&chip->reg_lock); -+ if (mv88e6xxx_port_set_mirror(chip, port, direction, false)) -+ dev_err(ds->dev, "p%d: failed to disable mirroring\n", port); -+ -+ for (i = 0; i < mv88e6xxx_num_ports(chip); i++) -+ other_mirrors |= mirror->ingress ? -+ chip->ports[i].mirror_ingress : -+ chip->ports[i].mirror_egress; -+ -+ /* Reset egress port when no other mirror is active */ -+ if (!other_mirrors) { -+ if (chip->info->ops->set_egress_port(chip, -+ direction, -+ dsa_upstream_port(ds, -+ port))); -+ dev_err(ds->dev, "failed to set egress port\n"); -+ } -+ -+ mutex_unlock(&chip->reg_lock); -+} -+ - static int mv88e6xxx_port_egress_floods(struct dsa_switch *ds, int port, - bool unicast, bool multicast) - { -@@ -4980,6 +5054,8 @@ static const struct dsa_switch_ops mv88e - .port_mdb_prepare = mv88e6xxx_port_mdb_prepare, - .port_mdb_add = mv88e6xxx_port_mdb_add, - .port_mdb_del = mv88e6xxx_port_mdb_del, -+ .port_mirror_add = mv88e6xxx_port_mirror_add, -+ .port_mirror_del = mv88e6xxx_port_mirror_del, - .crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join, - .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave, - .port_hwtstamp_set = mv88e6xxx_port_hwtstamp_set, ---- a/drivers/net/dsa/mv88e6xxx/chip.h -+++ b/drivers/net/dsa/mv88e6xxx/chip.h -@@ -232,6 +232,8 @@ struct mv88e6xxx_port { - u64 vtu_member_violation; - u64 vtu_miss_violation; - u8 cmode; -+ bool mirror_ingress; -+ bool mirror_egress; - unsigned int serdes_irq; - }; - -@@ -315,6 +317,10 @@ struct mv88e6xxx_chip { - u16 evcap_config; - u16 enable_count; - -+ /* Current ingress and egress monitor ports */ -+ int egress_dest_port; -+ int ingress_dest_port; -+ - /* Per-port timestamping resources. */ - struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS]; - ---- a/drivers/net/dsa/mv88e6xxx/global1.c -+++ b/drivers/net/dsa/mv88e6xxx/global1.c -@@ -298,6 +298,7 @@ int mv88e6095_g1_set_egress_port(struct - enum mv88e6xxx_egress_direction direction, - int port) - { -+ int *dest_port_chip; - u16 reg; - int err; - -@@ -307,11 +308,13 @@ int mv88e6095_g1_set_egress_port(struct - - switch (direction) { - case MV88E6XXX_EGRESS_DIR_INGRESS: -+ dest_port_chip = &chip->ingress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; - reg |= port << - __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); - break; - case MV88E6XXX_EGRESS_DIR_EGRESS: -+ dest_port_chip = &chip->egress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; - reg |= port << - __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); -@@ -320,7 +323,11 @@ int mv88e6095_g1_set_egress_port(struct - return -EINVAL; - } - -- return mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg); -+ err = mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg); -+ if (!err) -+ *dest_port_chip = port; -+ -+ return err; - } - - /* Older generations also call this the ARP destination. It has been -@@ -356,14 +363,17 @@ int mv88e6390_g1_set_egress_port(struct - enum mv88e6xxx_egress_direction direction, - int port) - { -+ int *dest_port_chip; - u16 ptr; - int err; - - switch (direction) { - case MV88E6XXX_EGRESS_DIR_INGRESS: -+ dest_port_chip = &chip->ingress_dest_port; - ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST; - break; - case MV88E6XXX_EGRESS_DIR_EGRESS: -+ dest_port_chip = &chip->egress_dest_port; - ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST; - break; - default: -@@ -371,10 +381,10 @@ int mv88e6390_g1_set_egress_port(struct - } - - err = mv88e6390_g1_monitor_write(chip, ptr, port); -- if (err) -- return err; -+ if (!err) -+ *dest_port_chip = port; - -- return 0; -+ return err; - } - - int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port) ---- a/drivers/net/dsa/mv88e6xxx/port.c -+++ b/drivers/net/dsa/mv88e6xxx/port.c -@@ -1181,6 +1181,43 @@ int mv88e6095_port_set_upstream_port(str - return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg); - } - -+int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port, -+ enum mv88e6xxx_egress_direction direction, -+ bool mirror) -+{ -+ bool *mirror_port; -+ u16 reg; -+ u16 bit; -+ int err; -+ -+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, ®); -+ if (err) -+ return err; -+ -+ switch (direction) { -+ case MV88E6XXX_EGRESS_DIR_INGRESS: -+ bit = MV88E6XXX_PORT_CTL2_INGRESS_MONITOR; -+ mirror_port = &chip->ports[port].mirror_ingress; -+ break; -+ case MV88E6XXX_EGRESS_DIR_EGRESS: -+ bit = MV88E6XXX_PORT_CTL2_EGRESS_MONITOR; -+ mirror_port = &chip->ports[port].mirror_egress; -+ break; -+ default: -+ return -EINVAL; -+ } -+ -+ reg &= ~bit; -+ if (mirror) -+ reg |= bit; -+ -+ err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL2, reg); -+ if (!err) -+ *mirror_port = mirror; -+ -+ return err; -+} -+ - int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port, - u16 mode) - { ---- a/drivers/net/dsa/mv88e6xxx/port.h -+++ b/drivers/net/dsa/mv88e6xxx/port.h -@@ -368,6 +368,9 @@ int mv88e6352_port_link_state(struct mv8 - int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port); - int mv88e6095_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port, - int upstream_port); -+int mv88e6xxx_port_set_mirror(struct mv88e6xxx_chip *chip, int port, -+ enum mv88e6xxx_egress_direction direction, -+ bool mirror); - - int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port); - int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port); diff --git a/target/linux/generic/backport-5.4/748-v5.5-net-dsa-mv88e6xxx-fix-broken-if-statement-because-of.patch b/target/linux/generic/backport-5.4/748-v5.5-net-dsa-mv88e6xxx-fix-broken-if-statement-because-of.patch deleted file mode 100644 index 37e7a7f2a9..0000000000 --- a/target/linux/generic/backport-5.4/748-v5.5-net-dsa-mv88e6xxx-fix-broken-if-statement-because-of.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 4e4637b10374ede3cd33d7e1b389e6cea6343ea3 Mon Sep 17 00:00:00 2001 -From: Colin Ian King <colin.king@canonical.com> -Date: Tue, 12 Nov 2019 13:05:23 +0000 -Subject: [PATCH] net: dsa: mv88e6xxx: fix broken if statement because of a - stray semicolon - -There is a stray semicolon in an if statement that will cause a dev_err -message to be printed unconditionally. Fix this by removing the stray -semicolon. - -Addresses-Coverity: ("Stay semicolon") -Fixes: f0942e00a1ab ("net: dsa: mv88e6xxx: Add support for port mirroring") -Signed-off-by: Colin Ian King <colin.king@canonical.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mv88e6xxx/chip.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/dsa/mv88e6xxx/chip.c -+++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -4993,7 +4993,7 @@ static void mv88e6xxx_port_mirror_del(st - if (chip->info->ops->set_egress_port(chip, - direction, - dsa_upstream_port(ds, -- port))); -+ port))) - dev_err(ds->dev, "failed to set egress port\n"); - } - diff --git a/target/linux/generic/backport-5.4/749-v5.5-net-dsa-mv88e6xxx-Fix-masking-of-egress-port.patch b/target/linux/generic/backport-5.4/749-v5.5-net-dsa-mv88e6xxx-Fix-masking-of-egress-port.patch deleted file mode 100644 index 497a808511..0000000000 --- a/target/linux/generic/backport-5.4/749-v5.5-net-dsa-mv88e6xxx-Fix-masking-of-egress-port.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 3ee339eb28959629db33aaa2b8cde4c63c6289eb Mon Sep 17 00:00:00 2001 -From: Andrew Lunn <andrew@lunn.ch> -Date: Thu, 27 Feb 2020 21:20:49 +0100 -Subject: [PATCH] net: dsa: mv88e6xxx: Fix masking of egress port - -Add missing ~ to the usage of the mask. - -Reported-by: Kevin Benson <Kevin.Benson@zii.aero> -Reported-by: Chris Healy <Chris.Healy@zii.aero> -Fixes: 5c74c54ce6ff ("net: dsa: mv88e6xxx: Split monitor port configuration") -Signed-off-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mv88e6xxx/global1.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/drivers/net/dsa/mv88e6xxx/global1.c -+++ b/drivers/net/dsa/mv88e6xxx/global1.c -@@ -309,13 +309,13 @@ int mv88e6095_g1_set_egress_port(struct - switch (direction) { - case MV88E6XXX_EGRESS_DIR_INGRESS: - dest_port_chip = &chip->ingress_dest_port; -- reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; -+ reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; - reg |= port << - __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); - break; - case MV88E6XXX_EGRESS_DIR_EGRESS: - dest_port_chip = &chip->egress_dest_port; -- reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; -+ reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; - reg |= port << - __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); - break; diff --git a/target/linux/generic/backport-5.4/750-v5.5-net-phy-add-support-for-clause-37-auto-negotiation.patch b/target/linux/generic/backport-5.4/750-v5.5-net-phy-add-support-for-clause-37-auto-negotiation.patch deleted file mode 100644 index 69c56eca7a..0000000000 --- a/target/linux/generic/backport-5.4/750-v5.5-net-phy-add-support-for-clause-37-auto-negotiation.patch +++ /dev/null @@ -1,195 +0,0 @@ -From fa6e98cee558622565c97924e922b97340aeabd8 Mon Sep 17 00:00:00 2001 -From: Heiner Kallweit <hkallweit1@gmail.com> -Date: Tue, 22 Oct 2019 11:31:07 -0700 -Subject: [PATCH] net: phy: add support for clause 37 auto-negotiation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch adds support for clause 37 1000Base-X auto-negotiation. - -Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com> -Signed-off-by: Tao Ren <taoren@fb.com> -Tested-by: René van Dorst <opensource@vdorst.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/phy/phy_device.c | 139 +++++++++++++++++++++++++++++++++++ - include/linux/phy.h | 4 + - 2 files changed, 143 insertions(+) - ---- a/drivers/net/phy/phy_device.c -+++ b/drivers/net/phy/phy_device.c -@@ -1682,6 +1682,40 @@ static int genphy_config_advert(struct p - } - - /** -+ * genphy_c37_config_advert - sanitize and advertise auto-negotiation parameters -+ * @phydev: target phy_device struct -+ * -+ * Description: Writes MII_ADVERTISE with the appropriate values, -+ * after sanitizing the values to make sure we only advertise -+ * what is supported. Returns < 0 on error, 0 if the PHY's advertisement -+ * hasn't changed, and > 0 if it has changed. This function is intended -+ * for Clause 37 1000Base-X mode. -+ */ -+static int genphy_c37_config_advert(struct phy_device *phydev) -+{ -+ u16 adv = 0; -+ -+ /* Only allow advertising what this PHY supports */ -+ linkmode_and(phydev->advertising, phydev->advertising, -+ phydev->supported); -+ -+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, -+ phydev->advertising)) -+ adv |= ADVERTISE_1000XFULL; -+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, -+ phydev->advertising)) -+ adv |= ADVERTISE_1000XPAUSE; -+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, -+ phydev->advertising)) -+ adv |= ADVERTISE_1000XPSE_ASYM; -+ -+ return phy_modify_changed(phydev, MII_ADVERTISE, -+ ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | -+ ADVERTISE_1000XHALF | ADVERTISE_1000XPSE_ASYM, -+ adv); -+} -+ -+/** - * genphy_config_eee_advert - disable unwanted eee mode advertisement - * @phydev: target phy_device struct - * -@@ -1790,6 +1824,54 @@ int __genphy_config_aneg(struct phy_devi - EXPORT_SYMBOL(__genphy_config_aneg); - - /** -+ * genphy_c37_config_aneg - restart auto-negotiation or write BMCR -+ * @phydev: target phy_device struct -+ * -+ * Description: If auto-negotiation is enabled, we configure the -+ * advertising, and then restart auto-negotiation. If it is not -+ * enabled, then we write the BMCR. This function is intended -+ * for use with Clause 37 1000Base-X mode. -+ */ -+int genphy_c37_config_aneg(struct phy_device *phydev) -+{ -+ int err, changed; -+ -+ if (phydev->autoneg != AUTONEG_ENABLE) -+ return genphy_setup_forced(phydev); -+ -+ err = phy_modify(phydev, MII_BMCR, BMCR_SPEED1000 | BMCR_SPEED100, -+ BMCR_SPEED1000); -+ if (err) -+ return err; -+ -+ changed = genphy_c37_config_advert(phydev); -+ if (changed < 0) /* error */ -+ return changed; -+ -+ if (!changed) { -+ /* Advertisement hasn't changed, but maybe aneg was never on to -+ * begin with? Or maybe phy was isolated? -+ */ -+ int ctl = phy_read(phydev, MII_BMCR); -+ -+ if (ctl < 0) -+ return ctl; -+ -+ if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) -+ changed = 1; /* do restart aneg */ -+ } -+ -+ /* Only restart aneg if we are advertising something different -+ * than we were before. -+ */ -+ if (changed > 0) -+ return genphy_restart_aneg(phydev); -+ -+ return 0; -+} -+EXPORT_SYMBOL(genphy_c37_config_aneg); -+ -+/** - * genphy_aneg_done - return auto-negotiation status - * @phydev: target phy_device struct - * -@@ -1962,6 +2044,63 @@ int genphy_read_status(struct phy_device - EXPORT_SYMBOL(genphy_read_status); - - /** -+ * genphy_c37_read_status - check the link status and update current link state -+ * @phydev: target phy_device struct -+ * -+ * Description: Check the link, then figure out the current state -+ * by comparing what we advertise with what the link partner -+ * advertises. This function is for Clause 37 1000Base-X mode. -+ */ -+int genphy_c37_read_status(struct phy_device *phydev) -+{ -+ int lpa, err, old_link = phydev->link; -+ -+ /* Update the link, but return if there was an error */ -+ err = genphy_update_link(phydev); -+ if (err) -+ return err; -+ -+ /* why bother the PHY if nothing can have changed */ -+ if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) -+ return 0; -+ -+ phydev->duplex = DUPLEX_UNKNOWN; -+ phydev->pause = 0; -+ phydev->asym_pause = 0; -+ -+ if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { -+ lpa = phy_read(phydev, MII_LPA); -+ if (lpa < 0) -+ return lpa; -+ -+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, -+ phydev->lp_advertising, lpa & LPA_LPACK); -+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, -+ phydev->lp_advertising, lpa & LPA_1000XFULL); -+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, -+ phydev->lp_advertising, lpa & LPA_1000XPAUSE); -+ linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, -+ phydev->lp_advertising, -+ lpa & LPA_1000XPAUSE_ASYM); -+ -+ phy_resolve_aneg_linkmode(phydev); -+ } else if (phydev->autoneg == AUTONEG_DISABLE) { -+ int bmcr = phy_read(phydev, MII_BMCR); -+ -+ if (bmcr < 0) -+ return bmcr; -+ -+ if (bmcr & BMCR_FULLDPLX) -+ phydev->duplex = DUPLEX_FULL; -+ else -+ phydev->duplex = DUPLEX_HALF; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL(genphy_c37_read_status); -+ -+/** - * genphy_soft_reset - software reset the PHY via BMCR_RESET bit - * @phydev: target phy_device struct - * ---- a/include/linux/phy.h -+++ b/include/linux/phy.h -@@ -1120,6 +1120,10 @@ int genphy_read_mmd_unsupported(struct p - int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, - u16 regnum, u16 val); - -+/* Clause 37 */ -+int genphy_c37_config_aneg(struct phy_device *phydev); -+int genphy_c37_read_status(struct phy_device *phydev); -+ - /* Clause 45 PHY */ - int genphy_c45_restart_aneg(struct phy_device *phydev); - int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart); diff --git a/target/linux/generic/backport-5.4/751-v5.6-net-mvmdio-avoid-error-message-for-optional-IRQ.patch b/target/linux/generic/backport-5.4/751-v5.6-net-mvmdio-avoid-error-message-for-optional-IRQ.patch deleted file mode 100644 index 6d51de8372..0000000000 --- a/target/linux/generic/backport-5.4/751-v5.6-net-mvmdio-avoid-error-message-for-optional-IRQ.patch +++ /dev/null @@ -1,33 +0,0 @@ -From fa2632f74e57bbc869c8ad37751a11b6147a3acc Mon Sep 17 00:00:00 2001 -From: Chris Packham <chris.packham@alliedtelesis.co.nz> -Date: Mon, 16 Mar 2020 20:49:07 +1300 -Subject: [PATCH] net: mvmdio: avoid error message for optional IRQ - -Per the dt-binding the interrupt is optional so use -platform_get_irq_optional() instead of platform_get_irq(). Since -commit 7723f4c5ecdb ("driver core: platform: Add an error message to -platform_get_irq*()") platform_get_irq() produces an error message - - orion-mdio f1072004.mdio: IRQ index 0 not found - -which is perfectly normal if one hasn't specified the optional property -in the device tree. - -Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/ethernet/marvell/mvmdio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/ethernet/marvell/mvmdio.c -+++ b/drivers/net/ethernet/marvell/mvmdio.c -@@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platf - } - - -- dev->err_interrupt = platform_get_irq(pdev, 0); -+ dev->err_interrupt = platform_get_irq_optional(pdev, 0); - if (dev->err_interrupt > 0 && - resource_size(r) < MVMDIO_ERR_INT_MASK + 4) { - dev_err(&pdev->dev, diff --git a/target/linux/generic/backport-5.4/752-v5.8-net-dsa-provide-an-option-for-drivers-to-always-rece.patch b/target/linux/generic/backport-5.4/752-v5.8-net-dsa-provide-an-option-for-drivers-to-always-rece.patch deleted file mode 100644 index 52d9351b70..0000000000 --- a/target/linux/generic/backport-5.4/752-v5.8-net-dsa-provide-an-option-for-drivers-to-always-rece.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 54a0ed0df49609f4e3f098f8943e38e389dc2e15 Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Tue, 12 May 2020 20:20:25 +0300 -Subject: net: dsa: provide an option for drivers to always receive bridge - VLANs - -DSA assumes that a bridge which has vlan filtering disabled is not -vlan aware, and ignores all vlan configuration. However, the kernel -software bridge code allows configuration in this state. - -This causes the kernel's idea of the bridge vlan state and the -hardware state to disagree, so "bridge vlan show" indicates a correct -configuration but the hardware lacks all configuration. Even worse, -enabling vlan filtering on a DSA bridge immediately blocks all traffic -which, given the output of "bridge vlan show", is very confusing. - -Provide an option that drivers can set to indicate they want to receive -vlan configuration even when vlan filtering is disabled. At the very -least, this is safe for Marvell DSA bridges, which do not look up -ingress traffic in the VTU if the port is in 8021Q disabled state. It is -also safe for the Ocelot switch family. Whether this change is suitable -for all DSA bridges is not known. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/dsa.h | 7 +++++++ - net/dsa/dsa_priv.h | 1 + - net/dsa/port.c | 14 ++++++++++++++ - net/dsa/slave.c | 8 ++++---- - 4 files changed, 26 insertions(+), 4 deletions(-) - ---- a/include/net/dsa.h -+++ b/include/net/dsa.h -@@ -270,6 +270,13 @@ struct dsa_switch { - */ - bool vlan_filtering_is_global; - -+ /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges -+ * that have vlan_filtering=0. All drivers should ideally set this (and -+ * then the option would get removed), but it is unknown whether this -+ * would break things or not. -+ */ -+ bool configure_vlan_while_not_filtering; -+ - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ ---- a/net/dsa/dsa_priv.h -+++ b/net/dsa/dsa_priv.h -@@ -139,6 +139,7 @@ int dsa_port_bridge_join(struct dsa_port - void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); - int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans); -+bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); - int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans); - int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, ---- a/net/dsa/port.c -+++ b/net/dsa/port.c -@@ -238,6 +238,20 @@ int dsa_port_vlan_filtering(struct dsa_p - return 0; - } - -+/* This enforces legacy behavior for switch drivers which assume they can't -+ * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0 -+ */ -+bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) -+{ -+ struct dsa_switch *ds = dp->ds; -+ -+ if (!dp->bridge_dev) -+ return false; -+ -+ return (!ds->configure_vlan_while_not_filtering && -+ !br_vlan_enabled(dp->bridge_dev)); -+} -+ - int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans) - { ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -319,7 +319,7 @@ static int dsa_slave_vlan_add(struct net - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - -- if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) -+ if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); -@@ -386,7 +386,7 @@ static int dsa_slave_vlan_del(struct net - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - -- if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) -+ if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - /* Do not deprogram the CPU port as it may be shared with other user -@@ -1120,7 +1120,7 @@ static int dsa_slave_vlan_rx_add_vid(str - * need to emulate the switchdev prepare + commit phase. - */ - if (dp->bridge_dev) { -- if (!br_vlan_enabled(dp->bridge_dev)) -+ if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - /* br_vlan_get_info() returns -EINVAL or -ENOENT if the -@@ -1154,7 +1154,7 @@ static int dsa_slave_vlan_rx_kill_vid(st - * need to emulate the switchdev prepare + commit phase. - */ - if (dp->bridge_dev) { -- if (!br_vlan_enabled(dp->bridge_dev)) -+ if (dsa_port_skip_vlan_configuration(dp)) - return 0; - - /* br_vlan_get_info() returns -EINVAL or -ENOENT if the diff --git a/target/linux/generic/backport-5.4/753-v5.8-net-dsa-mt7530-fix-VLAN-setup.patch b/target/linux/generic/backport-5.4/753-v5.8-net-dsa-mt7530-fix-VLAN-setup.patch deleted file mode 100644 index 0804cea9f7..0000000000 --- a/target/linux/generic/backport-5.4/753-v5.8-net-dsa-mt7530-fix-VLAN-setup.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0141792f8b7300006b874dda1c35acd0abd90d9d Mon Sep 17 00:00:00 2001 -From: DENG Qingfang <dqfext@gmail.com> -Date: Fri, 15 May 2020 23:25:55 +0800 -Subject: net: dsa: mt7530: fix VLAN setup - -Allow DSA to add VLAN entries even if VLAN filtering is disabled, so -enabling it will not block the traffic of existent ports in the bridge - -Signed-off-by: DENG Qingfang <dqfext@gmail.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mt7530.c | 13 +------------ - 1 file changed, 1 insertion(+), 12 deletions(-) - ---- a/drivers/net/dsa/mt7530.c -+++ b/drivers/net/dsa/mt7530.c -@@ -1083,12 +1083,6 @@ mt7530_port_vlan_add(struct dsa_switch * - struct mt7530_priv *priv = ds->priv; - u16 vid; - -- /* The port is kept as VLAN-unaware if bridge with vlan_filtering not -- * being set. -- */ -- if (!dsa_port_is_vlan_filtering(&ds->ports[port])) -- return; -- - mutex_lock(&priv->reg_mutex); - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { -@@ -1114,12 +1108,6 @@ mt7530_port_vlan_del(struct dsa_switch * - struct mt7530_priv *priv = ds->priv; - u16 vid, pvid; - -- /* The port is kept as VLAN-unaware if bridge with vlan_filtering not -- * being set. -- */ -- if (!dsa_port_is_vlan_filtering(&ds->ports[port])) -- return 0; -- - mutex_lock(&priv->reg_mutex); - - pvid = priv->ports[port].pvid; -@@ -1232,6 +1220,7 @@ mt7530_setup(struct dsa_switch *ds) - * as two netdev instances. - */ - dn = ds->ports[MT7530_CPU_PORT].master->dev.of_node->parent; -+ ds->configure_vlan_while_not_filtering = true; - - if (priv->id == ID_MT7530) { - regulator_set_voltage(priv->core_pwr, 1000000, 1000000); diff --git a/target/linux/generic/backport-5.4/756-v5.8-net-dsa-rtl8366-Pass-GENMASK-signed-bits.patch b/target/linux/generic/backport-5.4/756-v5.8-net-dsa-rtl8366-Pass-GENMASK-signed-bits.patch deleted file mode 100644 index b0ab598365..0000000000 --- a/target/linux/generic/backport-5.4/756-v5.8-net-dsa-rtl8366-Pass-GENMASK-signed-bits.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 733993f502f254912b1415e13f73651d9f2e74ef Mon Sep 17 00:00:00 2001 -From: Andrew Lunn <andrew@lunn.ch> -Date: Sun, 5 Jul 2020 22:42:27 +0200 -Subject: [PATCH 1/5] net: dsa: rtl8366: Pass GENMASK() signed bits - -Oddly, GENMASK() requires signed bit numbers, so that it can compare -them for < 0. If passed an unsigned type, we get warnings about the -test never being true. - -Signed-off-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/rtl8366.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/net/dsa/rtl8366.c -+++ b/drivers/net/dsa/rtl8366.c -@@ -311,7 +311,7 @@ int rtl8366_init_vlan(struct realtek_smi - /* For the CPU port, make all ports members of this - * VLAN. - */ -- mask = GENMASK(smi->num_ports - 1, 0); -+ mask = GENMASK((int)smi->num_ports - 1, 0); - else - /* For all other ports, enable itself plus the - * CPU port. diff --git a/target/linux/generic/backport-5.4/757-v5.8-net-dsa-tag_rtl4_a-Implement-Realtek-4-byte-A-tag.patch b/target/linux/generic/backport-5.4/757-v5.8-net-dsa-tag_rtl4_a-Implement-Realtek-4-byte-A-tag.patch deleted file mode 100644 index 70d7000ba1..0000000000 --- a/target/linux/generic/backport-5.4/757-v5.8-net-dsa-tag_rtl4_a-Implement-Realtek-4-byte-A-tag.patch +++ /dev/null @@ -1,232 +0,0 @@ -From 078ced30af696b52a450a016a16eb47499d68117 Mon Sep 17 00:00:00 2001 -From: Linus Walleij <linus.walleij@linaro.org> -Date: Wed, 8 Jul 2020 14:25:36 +0200 -Subject: [PATCH 2/5] net: dsa: tag_rtl4_a: Implement Realtek 4 byte A tag - -This implements the known parts of the Realtek 4 byte -tag protocol version 0xA, as found in the RTL8366RB -DSA switch. - -It is designated as protocol version 0xA as a -different Realtek 4 byte tag format with protocol -version 0x9 is known to exist in the Realtek RTL8306 -chips. - -The tag and switch chip lacks public documentation, so -the tag format has been reverse-engineered from -packet dumps. As only ingress traffic has been available -for analysis an egress tag has not been possible to -develop (even using educated guesses about bit fields) -so this is as far as it gets. It is not known if the -switch even supports egress tagging. - -Excessive attempts to figure out the egress tag format -was made. When nothing else worked, I just tried all bit -combinations with 0xannp where a is protocol and p is -port. I looped through all values several times trying -to get a response from ping, without any positive -result. - -Using just these ingress tags however, the switch -functionality is vastly improved and the packets find -their way into the destination port without any -tricky VLAN configuration. On the D-Link DIR-685 the -LAN ports now come up and respond to ping without -any command line configuration so this is a real -improvement for users. - -Egress packets need to be restricted to the proper -target ports using VLAN, which the RTL8366RB DSA -switch driver already sets up. - -Cc: DENG Qingfang <dqfext@gmail.com> -Cc: Mauri Sandberg <sandberg@mailfence.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Linus Walleij <linus.walleij@linaro.org> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - include/net/dsa.h | 2 + - net/dsa/Kconfig | 7 +++ - net/dsa/Makefile | 1 + - net/dsa/tag_rtl4_a.c | 130 +++++++++++++++++++++++++++++++++++++++++++ - 4 files changed, 140 insertions(+) - create mode 100644 net/dsa/tag_rtl4_a.c - ---- a/include/net/dsa.h -+++ b/include/net/dsa.h -@@ -42,6 +42,7 @@ struct phylink_link_state; - #define DSA_TAG_PROTO_8021Q_VALUE 12 - #define DSA_TAG_PROTO_SJA1105_VALUE 13 - #define DSA_TAG_PROTO_KSZ8795_VALUE 14 -+#define DSA_TAG_PROTO_RTL4_A_VALUE 17 - - enum dsa_tag_protocol { - DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, -@@ -59,6 +60,7 @@ enum dsa_tag_protocol { - DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, - DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, - DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, -+ DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, - }; - - struct packet_type; ---- a/net/dsa/Kconfig -+++ b/net/dsa/Kconfig -@@ -80,6 +80,13 @@ config NET_DSA_TAG_KSZ - Say Y if you want to enable support for tagging frames for the - Microchip 8795/9477/9893 families of switches. - -+config NET_DSA_TAG_RTL4_A -+ tristate "Tag driver for Realtek 4 byte protocol A tags" -+ help -+ Say Y or M if you want to enable support for tagging frames for the -+ Realtek switches with 4 byte protocol A tags, sich as found in -+ the Realtek RTL8366RB. -+ - config NET_DSA_TAG_QCA - tristate "Tag driver for Qualcomm Atheros QCA8K switches" - help ---- a/net/dsa/Makefile -+++ b/net/dsa/Makefile -@@ -10,6 +10,7 @@ obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa - obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o - obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o - obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o -+obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o - obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o - obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o - obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o ---- /dev/null -+++ b/net/dsa/tag_rtl4_a.c -@@ -0,0 +1,130 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Handler for Realtek 4 byte DSA switch tags -+ * Currently only supports protocol "A" found in RTL8366RB -+ * Copyright (c) 2020 Linus Walleij <linus.walleij@linaro.org> -+ * -+ * This "proprietary tag" header looks like so: -+ * -+ * ------------------------------------------------- -+ * | MAC DA | MAC SA | 0x8899 | 2 bytes tag | Type | -+ * ------------------------------------------------- -+ * -+ * The 2 bytes tag form a 16 bit big endian word. The exact -+ * meaning has been guessed from packet dumps from ingress -+ * frames, as no working egress traffic has been available -+ * we do not know the format of the egress tags or if they -+ * are even supported. -+ */ -+ -+#include <linux/etherdevice.h> -+#include <linux/bits.h> -+ -+#include "dsa_priv.h" -+ -+#define RTL4_A_HDR_LEN 4 -+#define RTL4_A_ETHERTYPE 0x8899 -+#define RTL4_A_PROTOCOL_SHIFT 12 -+/* -+ * 0x1 = Realtek Remote Control protocol (RRCP) -+ * 0x2/0x3 seems to be used for loopback testing -+ * 0x9 = RTL8306 DSA protocol -+ * 0xa = RTL8366RB DSA protocol -+ */ -+#define RTL4_A_PROTOCOL_RTL8366RB 0xa -+ -+static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, -+ struct net_device *dev) -+{ -+ /* -+ * Just let it pass thru, we don't know if it is possible -+ * to tag a frame with the 0x8899 ethertype and direct it -+ * to a specific port, all attempts at reverse-engineering have -+ * ended up with the frames getting dropped. -+ * -+ * The VLAN set-up needs to restrict the frames to the right port. -+ * -+ * If you have documentation on the tagging format for RTL8366RB -+ * (tag type A) then please contribute. -+ */ -+ return skb; -+} -+ -+static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, -+ struct net_device *dev, -+ struct packet_type *pt) -+{ -+ u16 protport; -+ __be16 *p; -+ u16 etype; -+ u8 *tag; -+ u8 prot; -+ u8 port; -+ -+ if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN))) -+ return NULL; -+ -+ /* The RTL4 header has its own custom Ethertype 0x8899 and that -+ * starts right at the beginning of the packet, after the src -+ * ethernet addr. Apparantly skb->data always points 2 bytes in, -+ * behind the Ethertype. -+ */ -+ tag = skb->data - 2; -+ p = (__be16 *)tag; -+ etype = ntohs(*p); -+ if (etype != RTL4_A_ETHERTYPE) { -+ /* Not custom, just pass through */ -+ netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype); -+ return skb; -+ } -+ p = (__be16 *)(tag + 2); -+ protport = ntohs(*p); -+ /* The 4 upper bits are the protocol */ -+ prot = (protport >> RTL4_A_PROTOCOL_SHIFT) & 0x0f; -+ if (prot != RTL4_A_PROTOCOL_RTL8366RB) { -+ netdev_err(dev, "unknown realtek protocol 0x%01x\n", prot); -+ return NULL; -+ } -+ port = protport & 0xff; -+ -+ skb->dev = dsa_master_find_slave(dev, 0, port); -+ if (!skb->dev) { -+ netdev_dbg(dev, "could not find slave for port %d\n", port); -+ return NULL; -+ } -+ -+ /* Remove RTL4 tag and recalculate checksum */ -+ skb_pull_rcsum(skb, RTL4_A_HDR_LEN); -+ -+ /* Move ethernet DA and SA in front of the data */ -+ memmove(skb->data - ETH_HLEN, -+ skb->data - ETH_HLEN - RTL4_A_HDR_LEN, -+ 2 * ETH_ALEN); -+ -+ skb->offload_fwd_mark = 1; -+ -+ return skb; -+} -+ -+static int rtl4a_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, -+ int *offset) -+{ -+ *offset = RTL4_A_HDR_LEN; -+ /* Skip past the tag and fetch the encapsulated Ethertype */ -+ *proto = ((__be16 *)skb->data)[1]; -+ -+ return 0; -+} -+ -+static const struct dsa_device_ops rtl4a_netdev_ops = { -+ .name = "rtl4a", -+ .proto = DSA_TAG_PROTO_RTL4_A, -+ .xmit = rtl4a_tag_xmit, -+ .rcv = rtl4a_tag_rcv, -+ .flow_dissect = rtl4a_tag_flow_dissect, -+ .overhead = RTL4_A_HDR_LEN, -+}; -+module_dsa_tag_driver(rtl4a_netdev_ops); -+ -+MODULE_LICENSE("GPL"); -+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A); diff --git a/target/linux/generic/backport-5.4/758-v5.8-net-dsa-rtl8366rb-Support-the-CPU-DSA-tag.patch b/target/linux/generic/backport-5.4/758-v5.8-net-dsa-rtl8366rb-Support-the-CPU-DSA-tag.patch deleted file mode 100644 index b68c033bbe..0000000000 --- a/target/linux/generic/backport-5.4/758-v5.8-net-dsa-rtl8366rb-Support-the-CPU-DSA-tag.patch +++ /dev/null @@ -1,100 +0,0 @@ -From c633ba43b7a9c2bfdb992ffd198d4c661520466f Mon Sep 17 00:00:00 2001 -From: Linus Walleij <linus.walleij@linaro.org> -Date: Wed, 8 Jul 2020 14:25:37 +0200 -Subject: [PATCH 3/5] net: dsa: rtl8366rb: Support the CPU DSA tag - -This activates the support to use the CPU tag to properly -direct ingress traffic to the right port. - -Bit 15 in register RTL8368RB_CPU_CTRL_REG can be set to -1 to disable the insertion of the CPU tag which is what -the code currently does. The bit 15 define calls this -setting RTL8368RB_CPU_INSTAG which is confusing since the -inverse meaning is implied: programmers may think that -setting this bit to 1 will *enable* inserting the tag -rather than disabling it, so rename this setting in -bit 15 to RTL8368RB_CPU_NO_TAG which is more to the -point. - -After this e.g. ping works out-of-the-box with the -RTL8366RB. - -Cc: DENG Qingfang <dqfext@gmail.com> -Cc: Mauri Sandberg <sandberg@mailfence.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Linus Walleij <linus.walleij@linaro.org> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/Kconfig | 1 + - drivers/net/dsa/rtl8366rb.c | 31 ++++++++----------------------- - 2 files changed, 9 insertions(+), 23 deletions(-) - ---- a/drivers/net/dsa/Kconfig -+++ b/drivers/net/dsa/Kconfig -@@ -66,6 +66,7 @@ config NET_DSA_QCA8K - config NET_DSA_REALTEK_SMI - tristate "Realtek SMI Ethernet switch family support" - depends on NET_DSA -+ select NET_DSA_TAG_RTL4_A - select FIXED_PHY - select IRQ_DOMAIN - select REALTEK_PHY ---- a/drivers/net/dsa/rtl8366rb.c -+++ b/drivers/net/dsa/rtl8366rb.c -@@ -109,8 +109,8 @@ - /* CPU port control reg */ - #define RTL8368RB_CPU_CTRL_REG 0x0061 - #define RTL8368RB_CPU_PORTS_MSK 0x00FF --/* Enables inserting custom tag length/type 0x8899 */ --#define RTL8368RB_CPU_INSTAG BIT(15) -+/* Disables inserting custom tag length/type 0x8899 */ -+#define RTL8368RB_CPU_NO_TAG BIT(15) - - #define RTL8366RB_SMAR0 0x0070 /* bits 0..15 */ - #define RTL8366RB_SMAR1 0x0071 /* bits 16..31 */ -@@ -844,16 +844,14 @@ static int rtl8366rb_setup(struct dsa_sw - if (ret) - return ret; - -- /* Enable CPU port and enable inserting CPU tag -+ /* Enable CPU port with custom DSA tag 8899. - * -- * Disabling RTL8368RB_CPU_INSTAG here will change the behaviour -- * of the switch totally and it will start talking Realtek RRCP -- * internally. It is probably possible to experiment with this, -- * but then the kernel needs to understand and handle RRCP first. -+ * If you set RTL8368RB_CPU_NO_TAG (bit 15) in this registers -+ * the custom tag is turned off. - */ - ret = regmap_update_bits(smi->map, RTL8368RB_CPU_CTRL_REG, - 0xFFFF, -- RTL8368RB_CPU_INSTAG | BIT(smi->cpu_port)); -+ BIT(smi->cpu_port)); - if (ret) - return ret; - -@@ -966,21 +964,8 @@ static int rtl8366rb_setup(struct dsa_sw - static enum dsa_tag_protocol rtl8366_get_tag_protocol(struct dsa_switch *ds, - int port) - { -- /* For now, the RTL switches are handled without any custom tags. -- * -- * It is possible to turn on "custom tags" by removing the -- * RTL8368RB_CPU_INSTAG flag when enabling the port but what it -- * does is unfamiliar to DSA: ethernet frames of type 8899, the Realtek -- * Remote Control Protocol (RRCP) start to appear on the CPU port of -- * the device. So this is not the ordinary few extra bytes in the -- * frame. Instead it appears that the switch starts to talk Realtek -- * RRCP internally which means a pretty complex RRCP implementation -- * decoding and responding the RRCP protocol is needed to exploit this. -- * -- * The OpenRRCP project (dormant since 2009) have reverse-egineered -- * parts of the protocol. -- */ -- return DSA_TAG_PROTO_NONE; -+ /* This switch uses the 4 byte protocol A Realtek DSA tag */ -+ return DSA_TAG_PROTO_RTL4_A; - } - - static void rtl8366rb_adjust_link(struct dsa_switch *ds, int port, diff --git a/target/linux/generic/backport-5.4/760-net-ethernet-mediatek-Integrate-GDM-PSE-setup-operat.patch b/target/linux/generic/backport-5.4/760-net-ethernet-mediatek-Integrate-GDM-PSE-setup-operat.patch deleted file mode 100644 index e352b0380e..0000000000 --- a/target/linux/generic/backport-5.4/760-net-ethernet-mediatek-Integrate-GDM-PSE-setup-operat.patch +++ /dev/null @@ -1,80 +0,0 @@ -From: MarkLee <Mark-MC.Lee@mediatek.com> -Date: Wed, 13 Nov 2019 10:38:42 +0800 -Subject: [PATCH] net: ethernet: mediatek: Integrate GDM/PSE setup operations - -Integrate GDM/PSE setup operations into single function "mtk_gdm_config" - -Signed-off-by: MarkLee <Mark-MC.Lee@mediatek.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -@@ -2232,6 +2232,28 @@ static int mtk_start_dma(struct mtk_eth - return 0; - } - -+static void mtk_gdm_config(struct mtk_eth *eth, u32 config) -+{ -+ int i; -+ -+ for (i = 0; i < MTK_MAC_COUNT; i++) { -+ u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); -+ -+ /* default setup the forward port to send frame to PDMA */ -+ val &= ~0xffff; -+ -+ /* Enable RX checksum */ -+ val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN; -+ -+ val |= config; -+ -+ mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i)); -+ } -+ /* Reset and enable PSE */ -+ mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); -+ mtk_w32(eth, 0, MTK_RST_GL); -+} -+ - static int mtk_open(struct net_device *dev) - { - struct mtk_mac *mac = netdev_priv(dev); -@@ -2427,8 +2449,6 @@ static int mtk_hw_init(struct mtk_eth *e - mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_tx_irq_disable(eth, ~0); - mtk_rx_irq_disable(eth, ~0); -- mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); -- mtk_w32(eth, 0, MTK_RST_GL); - - /* FE int grouping */ - mtk_w32(eth, MTK_TX_DONE_INT, MTK_PDMA_INT_GRP1); -@@ -2437,18 +2457,7 @@ static int mtk_hw_init(struct mtk_eth *e - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2); - mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); - -- for (i = 0; i < MTK_MAC_COUNT; i++) { -- u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); -- -- /* setup the forward port to send frame to PDMA */ -- val &= ~0xffff; -- -- /* Enable RX checksum */ -- val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN; -- -- /* setup the mac dma */ -- mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i)); -- } -+ mtk_gdm_config(eth, MTK_GDMA_TO_PDMA); - - return 0; - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h -@@ -84,6 +84,7 @@ - #define MTK_GDMA_ICS_EN BIT(22) - #define MTK_GDMA_TCS_EN BIT(21) - #define MTK_GDMA_UCS_EN BIT(20) -+#define MTK_GDMA_TO_PDMA 0x0 - - /* Unicast Filter MAC Address Register - Low */ - #define MTK_GDMA_MAC_ADRL(x) (0x508 + (x * 0x1000)) diff --git a/target/linux/generic/backport-5.4/761-net-ethernet-mediatek-Refine-the-timing-of-GDM-PSE-s.patch b/target/linux/generic/backport-5.4/761-net-ethernet-mediatek-Refine-the-timing-of-GDM-PSE-s.patch deleted file mode 100644 index d18d9f93eb..0000000000 --- a/target/linux/generic/backport-5.4/761-net-ethernet-mediatek-Refine-the-timing-of-GDM-PSE-s.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: MarkLee <Mark-MC.Lee@mediatek.com> -Date: Wed, 13 Nov 2019 10:38:43 +0800 -Subject: [PATCH] net: ethernet: mediatek: Refine the timing of GDM/PSE setup - -Refine the timing of GDM/PSE setup, move it from mtk_hw_init -to mtk_open. This is recommended by the mt762x HW design to -do GDM/PSE setup only after PDMA has been started. - -We exclude mt7628 in mtk_gdm_config function since it is a old IP -and there is no GDM/PSE block on it. - -Signed-off-by: MarkLee <Mark-MC.Lee@mediatek.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -@@ -2236,6 +2236,9 @@ static void mtk_gdm_config(struct mtk_et - { - int i; - -+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) -+ return; -+ - for (i = 0; i < MTK_MAC_COUNT; i++) { - u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); - -@@ -2274,6 +2277,8 @@ static int mtk_open(struct net_device *d - if (err) - return err; - -+ mtk_gdm_config(eth, MTK_GDMA_TO_PDMA); -+ - napi_enable(ð->tx_napi); - napi_enable(ð->rx_napi); - mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); -@@ -2457,8 +2462,6 @@ static int mtk_hw_init(struct mtk_eth *e - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2); - mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); - -- mtk_gdm_config(eth, MTK_GDMA_TO_PDMA); -- - return 0; - - err_disable_pm: diff --git a/target/linux/generic/backport-5.4/762-net-ethernet-mediatek-Enable-GDM-GDMA_DROP_ALL-mode.patch b/target/linux/generic/backport-5.4/762-net-ethernet-mediatek-Enable-GDM-GDMA_DROP_ALL-mode.patch deleted file mode 100644 index e25f1211eb..0000000000 --- a/target/linux/generic/backport-5.4/762-net-ethernet-mediatek-Enable-GDM-GDMA_DROP_ALL-mode.patch +++ /dev/null @@ -1,33 +0,0 @@ -From: MarkLee <Mark-MC.Lee@mediatek.com> -Date: Wed, 13 Nov 2019 10:38:44 +0800 -Subject: [PATCH] net: ethernet: mediatek: Enable GDM GDMA_DROP_ALL mode - -Enable GDM GDMA_DROP_ALL mode to drop all packet during the -stop operation. This is recommended by the mt762x HW design -to drop all packet from GMAC before stopping PDMA. - -Signed-off-by: MarkLee <Mark-MC.Lee@mediatek.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -@@ -2331,6 +2331,8 @@ static int mtk_stop(struct net_device *d - if (!refcount_dec_and_test(ð->dma_refcnt)) - return 0; - -+ mtk_gdm_config(eth, MTK_GDMA_DROP_ALL); -+ - mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); - mtk_rx_irq_disable(eth, MTK_RX_DONE_INT); - napi_disable(ð->tx_napi); ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h -@@ -85,6 +85,7 @@ - #define MTK_GDMA_TCS_EN BIT(21) - #define MTK_GDMA_UCS_EN BIT(20) - #define MTK_GDMA_TO_PDMA 0x0 -+#define MTK_GDMA_DROP_ALL 0x7777 - - /* Unicast Filter MAC Address Register - Low */ - #define MTK_GDMA_MAC_ADRL(x) (0x508 + (x * 0x1000)) diff --git a/target/linux/generic/backport-5.4/765-v5.12-net-dsa-automatically-bring-up-DSA-master-when-openi.patch b/target/linux/generic/backport-5.4/765-v5.12-net-dsa-automatically-bring-up-DSA-master-when-openi.patch deleted file mode 100644 index 7ec26899f9..0000000000 --- a/target/linux/generic/backport-5.4/765-v5.12-net-dsa-automatically-bring-up-DSA-master-when-openi.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 9d5ef190e5615a7b63af89f88c4106a5bc127974 Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Fri, 5 Feb 2021 15:37:10 +0200 -Subject: [PATCH] net: dsa: automatically bring up DSA master when opening user - port - -DSA wants the master interface to be open before the user port is due to -historical reasons. The promiscuity of interfaces that are down used to -have issues, as referenced Lennert Buytenhek in commit df02c6ff2e39 -("dsa: fix master interface allmulti/promisc handling"). - -The bugfix mentioned there, commit b6c40d68ff64 ("net: only invoke -dev->change_rx_flags when device is UP"), was basically a "don't do -that" approach to working around the promiscuity while down issue. - -Further work done by Vlad Yasevich in commit d2615bf45069 ("net: core: -Always propagate flag changes to interfaces") has resolved the -underlying issue, and it is strictly up to the DSA and 8021q drivers -now, it is no longer mandated by the networking core that the master -interface must be up when changing its promiscuity. - -From DSA's point of view, deciding to error out in dsa_slave_open -because the master isn't up is -(a) a bad user experience and -(b) knocking at an open door. -Even if there still was an issue with promiscuity while down, DSA could -still just open the master and avoid it. - -Doing it this way has the additional benefit that user space can now -remove DSA-specific workarounds, like systemd-networkd with BindCarrier: -https://github.com/systemd/systemd/issues/7478 - -And we can finally remove one of the 2 bullets in the "Common pitfalls -using DSA setups" chapter. - -Tested with two cascaded DSA switches: - -$ ip link set sw0p2 up -fsl_enetc 0000:00:00.2 eno2: configuring for fixed/internal link mode -fsl_enetc 0000:00:00.2 eno2: Link is Up - 1Gbps/Full - flow control rx/tx -mscc_felix 0000:00:00.5 swp0: configuring for fixed/sgmii link mode -mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Full - flow control off -8021q: adding VLAN 0 to HW filter on device swp0 -sja1105 spi2.0 sw0p2: configuring for phy/rgmii-id link mode -IPv6: ADDRCONF(NETDEV_CHANGE): eno2: link becomes ready -IPv6: ADDRCONF(NETDEV_CHANGE): swp0: link becomes ready - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - Documentation/networking/dsa/dsa.rst | 4 ---- - net/dsa/slave.c | 7 +++++-- - 2 files changed, 5 insertions(+), 6 deletions(-) - ---- a/Documentation/networking/dsa/dsa.rst -+++ b/Documentation/networking/dsa/dsa.rst -@@ -273,10 +273,6 @@ will not make us go through the switch t - the Ethernet switch on the other end, expecting a tag will typically drop this - frame. - --Slave network devices check that the master network device is UP before allowing --you to administratively bring UP these slave network devices. A common --configuration mistake is forgetting to bring UP the master network device first. -- - Interactions with other subsystems - ================================== - ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -70,8 +70,11 @@ static int dsa_slave_open(struct net_dev - struct dsa_port *dp = dsa_slave_to_port(dev); - int err; - -- if (!(master->flags & IFF_UP)) -- return -ENETDOWN; -+ err = dev_open(master, NULL); -+ if (err < 0) { -+ netdev_err(dev, "failed to open master %s\n", master->name); -+ goto out; -+ } - - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { - err = dev_uc_add(master, dev->dev_addr); diff --git a/target/linux/generic/backport-5.4/770-v5.12-net-bridge-notify-switchdev-of-disappearance-of-old-.patch b/target/linux/generic/backport-5.4/770-v5.12-net-bridge-notify-switchdev-of-disappearance-of-old-.patch deleted file mode 100644 index df4e74cd96..0000000000 --- a/target/linux/generic/backport-5.4/770-v5.12-net-bridge-notify-switchdev-of-disappearance-of-old-.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 90dc8fd36078a536671adae884d0b929cce6480a Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:30 +0200 -Subject: [PATCH] net: bridge: notify switchdev of disappearance of old FDB - entry upon migration - -Currently the bridge emits atomic switchdev notifications for -dynamically learnt FDB entries. Monitoring these notifications works -wonders for switchdev drivers that want to keep their hardware FDB in -sync with the bridge's FDB. - -For example station A wants to talk to station B in the diagram below, -and we are concerned with the behavior of the bridge on the DUT device: - - DUT - +-------------------------------------+ - | br0 | - | +------+ +------+ +------+ +------+ | - | | | | | | | | | | - | | swp0 | | swp1 | | swp2 | | eth0 | | - +-------------------------------------+ - | | | - Station A | | - | | - +--+------+--+ +--+------+--+ - | | | | | | | | - | | swp0 | | | | swp0 | | - Another | +------+ | | +------+ | Another - switch | br0 | | br0 | switch - | +------+ | | +------+ | - | | | | | | | | - | | swp1 | | | | swp1 | | - +--+------+--+ +--+------+--+ - | - Station B - -Interfaces swp0, swp1, swp2 are handled by a switchdev driver that has -the following property: frames injected from its control interface bypass -the internal address analyzer logic, and therefore, this hardware does -not learn from the source address of packets transmitted by the network -stack through it. So, since bridging between eth0 (where Station B is -attached) and swp0 (where Station A is attached) is done in software, -the switchdev hardware will never learn the source address of Station B. -So the traffic towards that destination will be treated as unknown, i.e. -flooded. - -This is where the bridge notifications come in handy. When br0 on the -DUT sees frames with Station B's MAC address on eth0, the switchdev -driver gets these notifications and can install a rule to send frames -towards Station B's address that are incoming from swp0, swp1, swp2, -only towards the control interface. This is all switchdev driver private -business, which the notification makes possible. - -All is fine until someone unplugs Station B's cable and moves it to the -other switch: - - DUT - +-------------------------------------+ - | br0 | - | +------+ +------+ +------+ +------+ | - | | | | | | | | | | - | | swp0 | | swp1 | | swp2 | | eth0 | | - +-------------------------------------+ - | | | - Station A | | - | | - +--+------+--+ +--+------+--+ - | | | | | | | | - | | swp0 | | | | swp0 | | - Another | +------+ | | +------+ | Another - switch | br0 | | br0 | switch - | +------+ | | +------+ | - | | | | | | | | - | | swp1 | | | | swp1 | | - +--+------+--+ +--+------+--+ - | - Station B - -Luckily for the use cases we care about, Station B is noisy enough that -the DUT hears it (on swp1 this time). swp1 receives the frames and -delivers them to the bridge, who enters the unlikely path in br_fdb_update -of updating an existing entry. It moves the entry in the software bridge -to swp1 and emits an addition notification towards that. - -As far as the switchdev driver is concerned, all that it needs to ensure -is that traffic between Station A and Station B is not forever broken. -If it does nothing, then the stale rule to send frames for Station B -towards the control interface remains in place. But Station B is no -longer reachable via the control interface, but via a port that can -offload the bridge port learning attribute. It's just that the port is -prevented from learning this address, since the rule overrides FDB -updates. So the rule needs to go. The question is via what mechanism. - -It sure would be possible for this switchdev driver to keep track of all -addresses which are sent to the control interface, and then also listen -for bridge notifier events on its own ports, searching for the ones that -have a MAC address which was previously sent to the control interface. -But this is cumbersome and inefficient. Instead, with one small change, -the bridge could notify of the address deletion from the old port, in a -symmetrical manner with how it did for the insertion. Then the switchdev -driver would not be required to monitor learn/forget events for its own -ports. It could just delete the rule towards the control interface upon -bridge entry migration. This would make hardware address learning be -possible again. Then it would take a few more packets until the hardware -and software FDB would be in sync again. - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com> -Reviewed-by: Ido Schimmel <idosch@nvidia.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/bridge/br_fdb.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/net/bridge/br_fdb.c -+++ b/net/bridge/br_fdb.c -@@ -581,6 +581,7 @@ void br_fdb_update(struct net_bridge *br - - /* fastpath: update of existing entry */ - if (unlikely(source != fdb->dst && !fdb->is_sticky)) { -+ br_switchdev_fdb_notify(fdb, RTM_DELNEIGH); - fdb->dst = source; - fdb_modified = true; - /* Take over HW learned entry */ diff --git a/target/linux/generic/backport-5.4/771-mdio-bus-add-generic-find-bus.patch b/target/linux/generic/backport-5.4/771-mdio-bus-add-generic-find-bus.patch deleted file mode 100644 index b79fa0f18a..0000000000 --- a/target/linux/generic/backport-5.4/771-mdio-bus-add-generic-find-bus.patch +++ /dev/null @@ -1,67 +0,0 @@ -From ce69e2162f158d9d4a0e513971d02dabc7d14cb7 Mon Sep 17 00:00:00 2001 -From: Jeremy Linton <jeremy.linton@arm.com> -Date: Mon, 24 Feb 2020 16:53:58 -0600 -Subject: [PATCH] mdio_bus: Add generic mdio_find_bus() - -It appears most ethernet drivers follow one of two main strategies -for mdio bus/phy management. A monolithic model where the net driver -itself creates, probes and uses the phy, and one where an external -mdio/phy driver instantiates the mdio bus/phy and the net driver -only attaches to a known phy. Usually in this latter model the phys -are discovered via DT relationships or simply phy name/address -hardcoding. - -This is a shame because modern well behaved mdio buses are self -describing and can be probed. The mdio layer itself is fully capable -of this, yet there isn't a clean way for a standalone net driver -to attach and enumerate the discovered devices. This is because -outside of of_mdio_find_bus() there isn't a straightforward way -to acquire the mii_bus pointer. - -So, lets add a mdio_find_bus which can return the mii_bus based -only on its name. - -Signed-off-by: Jeremy Linton <jeremy.linton@arm.com> -Acked-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/phy/mdio_bus.c | 17 +++++++++++++++++ - include/linux/phy.h | 1 + - 2 files changed, 18 insertions(+) - ---- a/drivers/net/phy/mdio_bus.c -+++ b/drivers/net/phy/mdio_bus.c -@@ -260,6 +260,23 @@ static struct class mdio_bus_class = { - .dev_release = mdiobus_release, - }; - -+/** -+ * mdio_find_bus - Given the name of a mdiobus, find the mii_bus. -+ * @mdio_bus_np: Pointer to the mii_bus. -+ * -+ * Returns a reference to the mii_bus, or NULL if none found. The -+ * embedded struct device will have its reference count incremented, -+ * and this must be put_deviced'ed once the bus is finished with. -+ */ -+struct mii_bus *mdio_find_bus(const char *mdio_name) -+{ -+ struct device *d; -+ -+ d = class_find_device_by_name(&mdio_bus_class, mdio_name); -+ return d ? to_mii_bus(d) : NULL; -+} -+EXPORT_SYMBOL(mdio_find_bus); -+ - #if IS_ENABLED(CONFIG_OF_MDIO) - /** - * of_mdio_find_bus - Given an mii_bus node, find the mii_bus. ---- a/include/linux/phy.h -+++ b/include/linux/phy.h -@@ -273,6 +273,7 @@ static inline struct mii_bus *devm_mdiob - return devm_mdiobus_alloc_size(dev, 0); - } - -+struct mii_bus *mdio_find_bus(const char *mdio_name); - void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); - struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); - diff --git a/target/linux/generic/backport-5.4/771-v5.12-net-dsa-be-louder-when-a-non-legacy-FDB-operation-fa.patch b/target/linux/generic/backport-5.4/771-v5.12-net-dsa-be-louder-when-a-non-legacy-FDB-operation-fa.patch deleted file mode 100644 index 893eb719ca..0000000000 --- a/target/linux/generic/backport-5.4/771-v5.12-net-dsa-be-louder-when-a-non-legacy-FDB-operation-fa.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 2fd186501b1cff155cc4a755c210793cfc0dffb5 Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:31 +0200 -Subject: [PATCH] net: dsa: be louder when a non-legacy FDB operation fails - -The dev_close() call was added in commit c9eb3e0f8701 ("net: dsa: Add -support for learning FDB through notification") "to indicate inconsistent -situation" when we could not delete an FDB entry from the port. - -bridge fdb del d8:58:d7:00:ca:6d dev swp0 self master - -It is a bit drastic and at the same time not helpful if the above fails -to only print with netdev_dbg log level, but on the other hand to bring -the interface down. - -So increase the verbosity of the error message, and drop dev_close(). - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/dsa/slave.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1593,7 +1593,9 @@ static void dsa_slave_switchdev_event_wo - - err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); - if (err) { -- netdev_dbg(dev, "fdb add failed err=%d\n", err); -+ netdev_err(dev, -+ "failed to add %pM vid %d to fdb: %d\n", -+ fdb_info->addr, fdb_info->vid, err); - break; - } - fdb_info->offloaded = true; -@@ -1608,9 +1610,11 @@ static void dsa_slave_switchdev_event_wo - - err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); - if (err) { -- netdev_dbg(dev, "fdb del failed err=%d\n", err); -- dev_close(dev); -+ netdev_err(dev, -+ "failed to delete %pM vid %d from fdb: %d\n", -+ fdb_info->addr, fdb_info->vid, err); - } -+ - break; - } - rtnl_unlock(); diff --git a/target/linux/generic/backport-5.4/772-v5.12-net-dsa-don-t-use-switchdev_notifier_fdb_info-in-dsa.patch b/target/linux/generic/backport-5.4/772-v5.12-net-dsa-don-t-use-switchdev_notifier_fdb_info-in-dsa.patch deleted file mode 100644 index 275870d19f..0000000000 --- a/target/linux/generic/backport-5.4/772-v5.12-net-dsa-don-t-use-switchdev_notifier_fdb_info-in-dsa.patch +++ /dev/null @@ -1,226 +0,0 @@ -From c4bb76a9a0ef87c4cc1f636defed5f12deb9f5a7 Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:32 +0200 -Subject: [PATCH] net: dsa: don't use switchdev_notifier_fdb_info in - dsa_switchdev_event_work - -Currently DSA doesn't add FDB entries on the CPU port, because it only -does so through switchdev, which is associated with a net_device, and -there are none of those for the CPU port. - -But actually FDB addresses on the CPU port have some use cases of their -own, if the switchdev operations are initiated from within the DSA -layer. There is just one problem with the existing code: it passes a -structure in dsa_switchdev_event_work which was retrieved directly from -switchdev, so it contains a net_device. We need to generalize the -contents to something that covers the CPU port as well: the "ds, port" -tuple is fine for that. - -Note that the new procedure for notifying the successful FDB offload is -inspired from the rocker model. - -Also, nothing was being done if added_by_user was false. Let's check for -that a lot earlier, and don't actually bother to schedule the worker -for nothing. - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/dsa/dsa_priv.h | 12 +++++ - net/dsa/slave.c | 106 ++++++++++++++++++++++----------------------- - 2 files changed, 65 insertions(+), 53 deletions(-) - ---- a/net/dsa/dsa_priv.h -+++ b/net/dsa/dsa_priv.h -@@ -62,6 +62,18 @@ struct dsa_notifier_vlan_info { - int port; - }; - -+struct dsa_switchdev_event_work { -+ struct dsa_switch *ds; -+ int port; -+ struct work_struct work; -+ unsigned long event; -+ /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and -+ * SWITCHDEV_FDB_DEL_TO_DEVICE -+ */ -+ unsigned char addr[ETH_ALEN]; -+ u16 vid; -+}; -+ - struct dsa_slave_priv { - /* Copy of CPU port xmit for faster access in slave transmit hot path */ - struct sk_buff * (*xmit)(struct sk_buff *skb, ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1568,76 +1568,66 @@ static int dsa_slave_netdevice_event(str - return NOTIFY_DONE; - } - --struct dsa_switchdev_event_work { -- struct work_struct work; -- struct switchdev_notifier_fdb_info fdb_info; -- struct net_device *dev; -- unsigned long event; --}; -+static void -+dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) -+{ -+ struct dsa_switch *ds = switchdev_work->ds; -+ struct switchdev_notifier_fdb_info info; -+ struct dsa_port *dp; -+ -+ if (!dsa_is_user_port(ds, switchdev_work->port)) -+ return; -+ -+ info.addr = switchdev_work->addr; -+ info.vid = switchdev_work->vid; -+ info.offloaded = true; -+ dp = dsa_to_port(ds, switchdev_work->port); -+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, -+ dp->slave, &info.info, NULL); -+} - - static void dsa_slave_switchdev_event_work(struct work_struct *work) - { - struct dsa_switchdev_event_work *switchdev_work = - container_of(work, struct dsa_switchdev_event_work, work); -- struct net_device *dev = switchdev_work->dev; -- struct switchdev_notifier_fdb_info *fdb_info; -- struct dsa_port *dp = dsa_slave_to_port(dev); -+ struct dsa_switch *ds = switchdev_work->ds; -+ struct dsa_port *dp; - int err; - -+ dp = dsa_to_port(ds, switchdev_work->port); -+ - rtnl_lock(); - switch (switchdev_work->event) { - case SWITCHDEV_FDB_ADD_TO_DEVICE: -- fdb_info = &switchdev_work->fdb_info; -- if (!fdb_info->added_by_user) -- break; -- -- err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); -+ err = dsa_port_fdb_add(dp, switchdev_work->addr, -+ switchdev_work->vid); - if (err) { -- netdev_err(dev, -- "failed to add %pM vid %d to fdb: %d\n", -- fdb_info->addr, fdb_info->vid, err); -+ dev_err(ds->dev, -+ "port %d failed to add %pM vid %d to fdb: %d\n", -+ dp->index, switchdev_work->addr, -+ switchdev_work->vid, err); - break; - } -- fdb_info->offloaded = true; -- call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, -- &fdb_info->info, NULL); -+ dsa_fdb_offload_notify(switchdev_work); - break; - - case SWITCHDEV_FDB_DEL_TO_DEVICE: -- fdb_info = &switchdev_work->fdb_info; -- if (!fdb_info->added_by_user) -- break; -- -- err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); -+ err = dsa_port_fdb_del(dp, switchdev_work->addr, -+ switchdev_work->vid); - if (err) { -- netdev_err(dev, -- "failed to delete %pM vid %d from fdb: %d\n", -- fdb_info->addr, fdb_info->vid, err); -+ dev_err(ds->dev, -+ "port %d failed to delete %pM vid %d from fdb: %d\n", -+ dp->index, switchdev_work->addr, -+ switchdev_work->vid, err); - } - - break; - } - rtnl_unlock(); - -- kfree(switchdev_work->fdb_info.addr); - kfree(switchdev_work); -- dev_put(dev); --} -- --static int --dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work * -- switchdev_work, -- const struct switchdev_notifier_fdb_info * -- fdb_info) --{ -- memcpy(&switchdev_work->fdb_info, fdb_info, -- sizeof(switchdev_work->fdb_info)); -- switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); -- if (!switchdev_work->fdb_info.addr) -- return -ENOMEM; -- ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, -- fdb_info->addr); -- return 0; -+ if (dsa_is_user_port(ds, dp->index)) -+ dev_put(dp->slave); - } - - /* Called under rcu_read_lock() */ -@@ -1645,7 +1635,9 @@ static int dsa_slave_switchdev_event(str - unsigned long event, void *ptr) - { - struct net_device *dev = switchdev_notifier_info_to_dev(ptr); -+ const struct switchdev_notifier_fdb_info *fdb_info; - struct dsa_switchdev_event_work *switchdev_work; -+ struct dsa_port *dp; - int err; - - if (event == SWITCHDEV_PORT_ATTR_SET) { -@@ -1658,20 +1650,32 @@ static int dsa_slave_switchdev_event(str - if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; - -+ dp = dsa_slave_to_port(dev); -+ - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; - - INIT_WORK(&switchdev_work->work, - dsa_slave_switchdev_event_work); -- switchdev_work->dev = dev; -+ switchdev_work->ds = dp->ds; -+ switchdev_work->port = dp->index; - switchdev_work->event = event; - - switch (event) { - case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ - case SWITCHDEV_FDB_DEL_TO_DEVICE: -- if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr)) -- goto err_fdb_work_init; -+ fdb_info = ptr; -+ -+ if (!fdb_info->added_by_user) { -+ kfree(switchdev_work); -+ return NOTIFY_OK; -+ } -+ -+ ether_addr_copy(switchdev_work->addr, -+ fdb_info->addr); -+ switchdev_work->vid = fdb_info->vid; -+ - dev_hold(dev); - break; - default: -@@ -1681,10 +1685,6 @@ static int dsa_slave_switchdev_event(str - - dsa_schedule_work(&switchdev_work->work); - return NOTIFY_OK; -- --err_fdb_work_init: -- kfree(switchdev_work); -- return NOTIFY_BAD; - } - - static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, diff --git a/target/linux/generic/backport-5.4/773-v5.12-net-dsa-move-switchdev-event-implementation-under-th.patch b/target/linux/generic/backport-5.4/773-v5.12-net-dsa-move-switchdev-event-implementation-under-th.patch deleted file mode 100644 index b70986fcc1..0000000000 --- a/target/linux/generic/backport-5.4/773-v5.12-net-dsa-move-switchdev-event-implementation-under-th.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 447d290a58bd335d68f665713842365d3d6447df Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:33 +0200 -Subject: [PATCH] net: dsa: move switchdev event implementation under the same - switch/case statement - -We'll need to start listening to SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE -events even for interfaces where dsa_slave_dev_check returns false, so -we need that check inside the switch-case statement for SWITCHDEV_FDB_*. - -This movement also avoids a useless allocation / free of switchdev_work -on the untreated "default event" case. - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/dsa/slave.c | 35 ++++++++++++++++------------------- - 1 file changed, 16 insertions(+), 19 deletions(-) - ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1640,31 +1640,29 @@ static int dsa_slave_switchdev_event(str - struct dsa_port *dp; - int err; - -- if (event == SWITCHDEV_PORT_ATTR_SET) { -+ switch (event) { -+ case SWITCHDEV_PORT_ATTR_SET: - err = switchdev_handle_port_attr_set(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_attr_set); - return notifier_from_errno(err); -- } -- -- if (!dsa_slave_dev_check(dev)) -- return NOTIFY_DONE; -+ case SWITCHDEV_FDB_ADD_TO_DEVICE: -+ case SWITCHDEV_FDB_DEL_TO_DEVICE: -+ if (!dsa_slave_dev_check(dev)) -+ return NOTIFY_DONE; - -- dp = dsa_slave_to_port(dev); -+ dp = dsa_slave_to_port(dev); - -- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); -- if (!switchdev_work) -- return NOTIFY_BAD; -- -- INIT_WORK(&switchdev_work->work, -- dsa_slave_switchdev_event_work); -- switchdev_work->ds = dp->ds; -- switchdev_work->port = dp->index; -- switchdev_work->event = event; -+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); -+ if (!switchdev_work) -+ return NOTIFY_BAD; -+ -+ INIT_WORK(&switchdev_work->work, -+ dsa_slave_switchdev_event_work); -+ switchdev_work->ds = dp->ds; -+ switchdev_work->port = dp->index; -+ switchdev_work->event = event; - -- switch (event) { -- case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ -- case SWITCHDEV_FDB_DEL_TO_DEVICE: - fdb_info = ptr; - - if (!fdb_info->added_by_user) { -@@ -1677,13 +1675,12 @@ static int dsa_slave_switchdev_event(str - switchdev_work->vid = fdb_info->vid; - - dev_hold(dev); -+ dsa_schedule_work(&switchdev_work->work); - break; - default: -- kfree(switchdev_work); - return NOTIFY_DONE; - } - -- dsa_schedule_work(&switchdev_work->work); - return NOTIFY_OK; - } - diff --git a/target/linux/generic/backport-5.4/774-v5.12-net-dsa-exit-early-in-dsa_slave_switchdev_event-if-w.patch b/target/linux/generic/backport-5.4/774-v5.12-net-dsa-exit-early-in-dsa_slave_switchdev_event-if-w.patch deleted file mode 100644 index c7ed4064e8..0000000000 --- a/target/linux/generic/backport-5.4/774-v5.12-net-dsa-exit-early-in-dsa_slave_switchdev_event-if-w.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 5fb4a451a87d8ed3363d28b63a3295399373d6c4 Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:34 +0200 -Subject: [PATCH] net: dsa: exit early in dsa_slave_switchdev_event if we can't - program the FDB - -Right now, the following would happen for a switch driver that does not -implement .port_fdb_add or .port_fdb_del. - -dsa_slave_switchdev_event returns NOTIFY_OK and schedules: --> dsa_slave_switchdev_event_work - -> dsa_port_fdb_add - -> dsa_port_notify(DSA_NOTIFIER_FDB_ADD) - -> dsa_switch_fdb_add - -> if (!ds->ops->port_fdb_add) return -EOPNOTSUPP; - -> an error is printed with dev_dbg, and - dsa_fdb_offload_notify(switchdev_work) is not called. - -We can avoid scheduling the worker for nothing and say NOTIFY_DONE. -Because we don't call dsa_fdb_offload_notify, the static FDB entry will -remain just in the software bridge. - -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - net/dsa/slave.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1653,6 +1653,9 @@ static int dsa_slave_switchdev_event(str - - dp = dsa_slave_to_port(dev); - -+ if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) -+ return NOTIFY_DONE; -+ - switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); - if (!switchdev_work) - return NOTIFY_BAD; diff --git a/target/linux/generic/backport-5.4/775-v5.12-net-dsa-listen-for-SWITCHDEV_-FDB-DEL-_ADD_TO_DEVICE.patch b/target/linux/generic/backport-5.4/775-v5.12-net-dsa-listen-for-SWITCHDEV_-FDB-DEL-_ADD_TO_DEVICE.patch deleted file mode 100644 index e4ed6e808f..0000000000 --- a/target/linux/generic/backport-5.4/775-v5.12-net-dsa-listen-for-SWITCHDEV_-FDB-DEL-_ADD_TO_DEVICE.patch +++ /dev/null @@ -1,263 +0,0 @@ -From d5f19486cee79d04c054427577ac96ed123706db Mon Sep 17 00:00:00 2001 -From: Vladimir Oltean <vladimir.oltean@nxp.com> -Date: Wed, 6 Jan 2021 11:51:35 +0200 -Subject: [PATCH] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on - foreign bridge neighbors - -Some DSA switches (and not only) cannot learn source MAC addresses from -packets injected from the CPU. They only perform hardware address -learning from inbound traffic. - -This can be problematic when we have a bridge spanning some DSA switch -ports and some non-DSA ports (which we'll call "foreign interfaces" from -DSA's perspective). - -There are 2 classes of problems created by the lack of learning on -CPU-injected traffic: -- excessive flooding, due to the fact that DSA treats those addresses as - unknown -- the risk of stale routes, which can lead to temporary packet loss - -To illustrate the second class, consider the following situation, which -is common in production equipment (wireless access points, where there -is a WLAN interface and an Ethernet switch, and these form a single -bridging domain). - - AP 1: - +------------------------------------------------------------------------+ - | br0 | - +------------------------------------------------------------------------+ - +------------+ +------------+ +------------+ +------------+ +------------+ - | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 | - +------------+ +------------+ +------------+ +------------+ +------------+ - | ^ ^ - | | | - | | | - | Client A Client B - | - | - | - +------------+ +------------+ +------------+ +------------+ +------------+ - | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 | - +------------+ +------------+ +------------+ +------------+ +------------+ - +------------------------------------------------------------------------+ - | br0 | - +------------------------------------------------------------------------+ - AP 2 - -- br0 of AP 1 will know that Clients A and B are reachable via wlan0 -- the hardware fdb of a DSA switch driver today is not kept in sync with - the software entries on other bridge ports, so it will not know that - clients A and B are reachable via the CPU port UNLESS the hardware - switch itself performs SA learning from traffic injected from the CPU. - Nonetheless, a substantial number of switches don't. -- the hardware fdb of the DSA switch on AP 2 may autonomously learn that - Client A and B are reachable through swp0. Therefore, the software br0 - of AP 2 also may or may not learn this. In the example we're - illustrating, some Ethernet traffic has been going on, and br0 from AP - 2 has indeed learnt that it can reach Client B through swp0. - -One of the wireless clients, say Client B, disconnects from AP 1 and -roams to AP 2. The topology now looks like this: - - AP 1: - +------------------------------------------------------------------------+ - | br0 | - +------------------------------------------------------------------------+ - +------------+ +------------+ +------------+ +------------+ +------------+ - | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 | - +------------+ +------------+ +------------+ +------------+ +------------+ - | ^ - | | - | Client A - | - | - | Client B - | | - | v - +------------+ +------------+ +------------+ +------------+ +------------+ - | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 | - +------------+ +------------+ +------------+ +------------+ +------------+ - +------------------------------------------------------------------------+ - | br0 | - +------------------------------------------------------------------------+ - AP 2 - -- br0 of AP 1 still knows that Client A is reachable via wlan0 (no change) -- br0 of AP 1 will (possibly) know that Client B has left wlan0. There - are cases where it might never find out though. Either way, DSA today - does not process that notification in any way. -- the hardware FDB of the DSA switch on AP 1 may learn autonomously that - Client B can be reached via swp0, if it receives any packet with - Client 1's source MAC address over Ethernet. -- the hardware FDB of the DSA switch on AP 2 still thinks that Client B - can be reached via swp0. It does not know that it has roamed to wlan0, - because it doesn't perform SA learning from the CPU port. - -Now Client A contacts Client B. -AP 1 routes the packet fine towards swp0 and delivers it on the Ethernet -segment. -AP 2 sees a frame on swp0 and its fdb says that the destination is swp0. -Hairpinning is disabled => drop. - -This problem comes from the fact that these switches have a 'blind spot' -for addresses coming from software bridging. The generic solution is not -to assume that hardware learning can be enabled somehow, but to listen -to more bridge learning events. It turns out that the bridge driver does -learn in software from all inbound frames, in __br_handle_local_finish. -A proper SWITCHDEV_FDB_ADD_TO_DEVICE notification is emitted for the -addresses serviced by the bridge on 'foreign' interfaces. The software -bridge also does the right thing on migration, by notifying that the old -entry is deleted, so that does not need to be special-cased in DSA. When -it is deleted, we just need to delete our static FDB entry towards the -CPU too, and wait. - -The problem is that DSA currently only cares about SWITCHDEV_FDB_ADD_TO_DEVICE -events received on its own interfaces, such as static FDB entries. - -Luckily we can change that, and DSA can listen to all switchdev FDB -add/del events in the system and figure out if those events were emitted -by a bridge that spans at least one of DSA's own ports. In case that is -true, DSA will also offload that address towards its own CPU port, in -the eventuality that there might be bridge clients attached to the DSA -switch who want to talk to the station connected to the foreign -interface. - -In terms of implementation, we need to keep the fdb_info->added_by_user -check for the case where the switchdev event was targeted directly at a -DSA switch port. But we don't need to look at that flag for snooped -events. So the check is currently too late, we need to move it earlier. -This also simplifies the code a bit, since we avoid uselessly allocating -and freeing switchdev_work. - -We could probably do some improvements in the future. For example, -multi-bridge support is rudimentary at the moment. If there are two -bridges spanning a DSA switch's ports, and both of them need to service -the same MAC address, then what will happen is that the migration of one -of those stations will trigger the deletion of the FDB entry from the -CPU port while it is still used by other bridge. That could be improved -with reference counting but is left for another time. - -This behavior needs to be enabled at driver level by setting -ds->assisted_learning_on_cpu_port = true. This is because we don't want -to inflict a potential performance penalty (accesses through -MDIO/I2C/SPI are expensive) to hardware that really doesn't need it -because address learning on the CPU port works there. - -Reported-by: DENG Qingfang <dqfext@gmail.com> -Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> -Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> -[Backported to linux-5.4.y] -Signed-off-by: DENG Qingfang <dqfext@gmail.com> ---- - include/net/dsa.h | 5 ++++ - net/dsa/slave.c | 63 ++++++++++++++++++++++++++++++++++++++--------- - 2 files changed, 57 insertions(+), 11 deletions(-) - ---- a/include/net/dsa.h -+++ b/include/net/dsa.h -@@ -279,6 +279,11 @@ struct dsa_switch { - */ - bool configure_vlan_while_not_filtering; - -+ /* Let DSA manage the FDB entries towards the CPU, based on the -+ * software bridge database. -+ */ -+ bool assisted_learning_on_cpu_port; -+ - /* In case vlan_filtering_is_global is set, the VLAN awareness state - * should be retrieved from here and not from the per-port settings. - */ ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1630,6 +1630,25 @@ static void dsa_slave_switchdev_event_wo - dev_put(dp->slave); - } - -+static int dsa_lower_dev_walk(struct net_device *lower_dev, void *data) -+{ -+ if (dsa_slave_dev_check(lower_dev)) { -+ *((void **)data) = (void *)netdev_priv(lower_dev); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev) -+{ -+ struct dsa_slave_priv *data = NULL; -+ -+ netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, (void **) &data); -+ -+ return data; -+} -+ - /* Called under rcu_read_lock() */ - static int dsa_slave_switchdev_event(struct notifier_block *unused, - unsigned long event, void *ptr) -@@ -1648,10 +1667,37 @@ static int dsa_slave_switchdev_event(str - return notifier_from_errno(err); - case SWITCHDEV_FDB_ADD_TO_DEVICE: - case SWITCHDEV_FDB_DEL_TO_DEVICE: -- if (!dsa_slave_dev_check(dev)) -- return NOTIFY_DONE; -+ fdb_info = ptr; -+ -+ if (dsa_slave_dev_check(dev)) { -+ if (!fdb_info->added_by_user) -+ return NOTIFY_OK; -+ -+ dp = dsa_slave_to_port(dev); -+ } else { -+ /* Snoop addresses learnt on foreign interfaces -+ * bridged with us, for switches that don't -+ * automatically learn SA from CPU-injected traffic -+ */ -+ struct net_device *br_dev; -+ struct dsa_slave_priv *p; -+ -+ br_dev = netdev_master_upper_dev_get_rcu(dev); -+ if (!br_dev) -+ return NOTIFY_DONE; -+ -+ if (!netif_is_bridge_master(br_dev)) -+ return NOTIFY_DONE; -+ -+ p = dsa_slave_dev_lower_find(br_dev); -+ if (!p) -+ return NOTIFY_DONE; - -- dp = dsa_slave_to_port(dev); -+ dp = p->dp->cpu_dp; -+ -+ if (!dp->ds->assisted_learning_on_cpu_port) -+ return NOTIFY_DONE; -+ } - - if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) - return NOTIFY_DONE; -@@ -1666,18 +1712,13 @@ static int dsa_slave_switchdev_event(str - switchdev_work->port = dp->index; - switchdev_work->event = event; - -- fdb_info = ptr; -- -- if (!fdb_info->added_by_user) { -- kfree(switchdev_work); -- return NOTIFY_OK; -- } -- - ether_addr_copy(switchdev_work->addr, - fdb_info->addr); - switchdev_work->vid = fdb_info->vid; - -- dev_hold(dev); -+ /* Hold a reference on the slave for dsa_fdb_offload_notify */ -+ if (dsa_is_user_port(dp->ds, dp->index)) -+ dev_hold(dev); - dsa_schedule_work(&switchdev_work->work); - break; - default: diff --git a/target/linux/generic/backport-5.4/780-net-dsa-mt7530-setup-core-clock-even-in-TRGMII-mode.patch b/target/linux/generic/backport-5.4/780-net-dsa-mt7530-setup-core-clock-even-in-TRGMII-mode.patch deleted file mode 100644 index 7ad7cd3a01..0000000000 --- a/target/linux/generic/backport-5.4/780-net-dsa-mt7530-setup-core-clock-even-in-TRGMII-mode.patch +++ /dev/null @@ -1,84 +0,0 @@ -From c3b8e07909dbe67b0d580416c1a5257643a73be7 Mon Sep 17 00:00:00 2001 -From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> -Date: Fri, 12 Mar 2021 00:07:03 -0800 -Subject: [PATCH] net: dsa: mt7530: setup core clock even in TRGMII mode - -A recent change to MIPS ralink reset logic made it so mt7530 actually -resets the switch on platforms such as mt7621 (where bit 2 is the reset -line for the switch). That exposed an issue where the switch would not -function properly in TRGMII mode after a reset. - -Reconfigure core clock in TRGMII mode to fix the issue. - -Tested on Ubiquiti ER-X (MT7621) with TRGMII mode enabled. - -Fixes: 3f9ef7785a9c ("MIPS: ralink: manage low reset lines") -Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/dsa/mt7530.c | 52 +++++++++++++++++++--------------------- - 1 file changed, 25 insertions(+), 27 deletions(-) - ---- a/drivers/net/dsa/mt7530.c -+++ b/drivers/net/dsa/mt7530.c -@@ -428,34 +428,32 @@ mt7530_pad_clk_setup(struct dsa_switch * - TD_DM_DRVP(8) | TD_DM_DRVN(8)); - - /* Setup core clock for MT7530 */ -- if (!trgint) { -- /* Disable MT7530 core clock */ -- core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); -- -- /* Disable PLL, since phy_device has not yet been created -- * provided for phy_[read,write]_mmd_indirect is called, we -- * provide our own core_write_mmd_indirect to complete this -- * function. -- */ -- core_write_mmd_indirect(priv, -- CORE_GSWPLL_GRP1, -- MDIO_MMD_VEND2, -- 0); -- -- /* Set core clock into 500Mhz */ -- core_write(priv, CORE_GSWPLL_GRP2, -- RG_GSWPLL_POSDIV_500M(1) | -- RG_GSWPLL_FBKDIV_500M(25)); -- -- /* Enable PLL */ -- core_write(priv, CORE_GSWPLL_GRP1, -- RG_GSWPLL_EN_PRE | -- RG_GSWPLL_POSDIV_200M(2) | -- RG_GSWPLL_FBKDIV_200M(32)); -- -- /* Enable MT7530 core clock */ -- core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); -- } -+ /* Disable MT7530 core clock */ -+ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); -+ -+ /* Disable PLL, since phy_device has not yet been created -+ * provided for phy_[read,write]_mmd_indirect is called, we -+ * provide our own core_write_mmd_indirect to complete this -+ * function. -+ */ -+ core_write_mmd_indirect(priv, -+ CORE_GSWPLL_GRP1, -+ MDIO_MMD_VEND2, -+ 0); -+ -+ /* Set core clock into 500Mhz */ -+ core_write(priv, CORE_GSWPLL_GRP2, -+ RG_GSWPLL_POSDIV_500M(1) | -+ RG_GSWPLL_FBKDIV_500M(25)); -+ -+ /* Enable PLL */ -+ core_write(priv, CORE_GSWPLL_GRP1, -+ RG_GSWPLL_EN_PRE | -+ RG_GSWPLL_POSDIV_200M(2) | -+ RG_GSWPLL_FBKDIV_200M(32)); -+ -+ /* Enable MT7530 core clock */ -+ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); - - /* Setup the MT7530 TRGMII Tx Clock */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); diff --git a/target/linux/generic/backport-5.4/782-net-next-1-of-net-pass-the-dst-buffer-to-of_get_mac_address.patch b/target/linux/generic/backport-5.4/782-net-next-1-of-net-pass-the-dst-buffer-to-of_get_mac_address.patch deleted file mode 100644 index 66d379b859..0000000000 --- a/target/linux/generic/backport-5.4/782-net-next-1-of-net-pass-the-dst-buffer-to-of_get_mac_address.patch +++ /dev/null @@ -1,1875 +0,0 @@ -From 83216e3988cd196183542937c9bd58b279f946af Mon Sep 17 00:00:00 2001 -From: Michael Walle <michael@walle.cc> -Date: Mon, 12 Apr 2021 19:47:17 +0200 -Subject: of: net: pass the dst buffer to of_get_mac_address() - -of_get_mac_address() returns a "const void*" pointer to a MAC address. -Lately, support to fetch the MAC address by an NVMEM provider was added. -But this will only work with platform devices. It will not work with -PCI devices (e.g. of an integrated root complex) and esp. not with DSA -ports. - -There is an of_* variant of the nvmem binding which works without -devices. The returned data of a nvmem_cell_read() has to be freed after -use. On the other hand the return of_get_mac_address() points to some -static data without a lifetime. The trick for now, was to allocate a -device resource managed buffer which is then returned. This will only -work if we have an actual device. - -Change it, so that the caller of of_get_mac_address() has to supply a -buffer where the MAC address is written to. Unfortunately, this will -touch all drivers which use the of_get_mac_address(). - -Usually the code looks like: - - const char *addr; - addr = of_get_mac_address(np); - if (!IS_ERR(addr)) - ether_addr_copy(ndev->dev_addr, addr); - -This can then be simply rewritten as: - - of_get_mac_address(np, ndev->dev_addr); - -Sometimes is_valid_ether_addr() is used to test the MAC address. -of_get_mac_address() already makes sure, it just returns a valid MAC -address. Thus we can just test its return code. But we have to be -careful if there are still other sources for the MAC address before the -of_get_mac_address(). In this case we have to keep the -is_valid_ether_addr() call. - -The following coccinelle patch was used to convert common cases to the -new style. Afterwards, I've manually gone over the drivers and fixed the -return code variable: either used a new one or if one was already -available use that. Mansour Moufid, thanks for that coccinelle patch! - -<spml> -@a@ -identifier x; -expression y, z; -@@ -- x = of_get_mac_address(y); -+ x = of_get_mac_address(y, z); - <... -- ether_addr_copy(z, x); - ...> - -@@ -identifier a.x; -@@ -- if (<+... x ...+>) {} - -@@ -identifier a.x; -@@ - if (<+... x ...+>) { - ... - } -- else {} - -@@ -identifier a.x; -expression e; -@@ -- if (<+... x ...+>@e) -- {} -- else -+ if (!(e)) - {...} - -@@ -expression x, y, z; -@@ -- x = of_get_mac_address(y, z); -+ of_get_mac_address(y, z); - ... when != x -</spml> - -All drivers, except drivers/net/ethernet/aeroflex/greth.c, were -compile-time tested. - -Suggested-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: Michael Walle <michael@walle.cc> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - arch/arm/mach-mvebu/kirkwood.c | 3 +- - arch/powerpc/sysdev/tsi108_dev.c | 5 +- - drivers/net/ethernet/aeroflex/greth.c | 6 +-- - drivers/net/ethernet/allwinner/sun4i-emac.c | 10 ++-- - drivers/net/ethernet/altera/altera_tse_main.c | 7 +-- - drivers/net/ethernet/arc/emac_main.c | 8 +-- - drivers/net/ethernet/atheros/ag71xx.c | 7 +-- - drivers/net/ethernet/broadcom/bcm4908_enet.c | 7 +-- - drivers/net/ethernet/broadcom/bcmsysport.c | 7 +-- - drivers/net/ethernet/broadcom/bgmac-bcma.c | 10 ++-- - drivers/net/ethernet/broadcom/bgmac-platform.c | 11 ++-- - drivers/net/ethernet/cadence/macb_main.c | 11 ++-- - drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 8 +-- - drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 5 +- - drivers/net/ethernet/davicom/dm9000.c | 10 ++-- - drivers/net/ethernet/ethoc.c | 6 +-- - drivers/net/ethernet/ezchip/nps_enet.c | 7 +-- - drivers/net/ethernet/freescale/fec_main.c | 7 +-- - drivers/net/ethernet/freescale/fec_mpc52xx.c | 7 +-- - drivers/net/ethernet/freescale/fman/mac.c | 9 ++-- - .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 5 +- - drivers/net/ethernet/freescale/gianfar.c | 8 +-- - drivers/net/ethernet/freescale/ucc_geth.c | 5 +- - drivers/net/ethernet/hisilicon/hisi_femac.c | 7 +-- - drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 7 +-- - drivers/net/ethernet/lantiq_xrx200.c | 7 +-- - drivers/net/ethernet/marvell/mv643xx_eth.c | 5 +- - drivers/net/ethernet/marvell/mvneta.c | 6 +-- - .../net/ethernet/marvell/prestera/prestera_main.c | 11 ++-- - drivers/net/ethernet/marvell/pxa168_eth.c | 9 +--- - drivers/net/ethernet/marvell/sky2.c | 8 ++- - drivers/net/ethernet/mediatek/mtk_eth_soc.c | 11 ++-- - drivers/net/ethernet/micrel/ks8851_common.c | 7 ++- - drivers/net/ethernet/microchip/lan743x_main.c | 5 +- - drivers/net/ethernet/nxp/lpc_eth.c | 4 +- - drivers/net/ethernet/qualcomm/qca_spi.c | 10 ++-- - drivers/net/ethernet/qualcomm/qca_uart.c | 9 +--- - drivers/net/ethernet/renesas/ravb_main.c | 12 +++-- - drivers/net/ethernet/renesas/sh_eth.c | 5 +- - .../net/ethernet/samsung/sxgbe/sxgbe_platform.c | 13 ++--- - drivers/net/ethernet/socionext/sni_ave.c | 10 ++-- - .../net/ethernet/stmicro/stmmac/dwmac-anarion.c | 2 +- - .../ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-generic.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-intel-plat.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 2 +- - .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 +- - .../net/ethernet/stmicro/stmmac/dwmac-visconti.c | 2 +- - drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +- - drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- - .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 14 ++--- - .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 2 +- - drivers/net/ethernet/ti/am65-cpsw-nuss.c | 19 ++++--- - drivers/net/ethernet/ti/cpsw.c | 7 +-- - drivers/net/ethernet/ti/cpsw_new.c | 7 +-- - drivers/net/ethernet/ti/davinci_emac.c | 8 +-- - drivers/net/ethernet/ti/netcp_core.c | 7 +-- - drivers/net/ethernet/wiznet/w5100-spi.c | 8 ++- - drivers/net/ethernet/wiznet/w5100.c | 2 +- - drivers/net/ethernet/xilinx/ll_temac_main.c | 8 +-- - drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 15 +++--- - drivers/net/ethernet/xilinx/xilinx_emaclite.c | 8 +-- - drivers/net/wireless/ath/ath9k/init.c | 5 +- - drivers/net/wireless/mediatek/mt76/eeprom.c | 9 +--- - drivers/net/wireless/ralink/rt2x00/rt2x00dev.c | 6 +-- - drivers/of/of_net.c | 60 ++++++++++------------ - drivers/staging/octeon/ethernet.c | 10 ++-- - drivers/staging/wfx/main.c | 7 ++- - include/linux/of_net.h | 6 +-- - include/net/dsa.h | 2 +- - net/dsa/dsa2.c | 2 +- - net/dsa/slave.c | 2 +- - net/ethernet/eth.c | 11 ++-- - 85 files changed, 218 insertions(+), 364 deletions(-) - ---- a/arch/arm/mach-mvebu/kirkwood.c -+++ b/arch/arm/mach-mvebu/kirkwood.c -@@ -84,6 +84,7 @@ static void __init kirkwood_dt_eth_fixup - struct device_node *pnp = of_get_parent(np); - struct clk *clk; - struct property *pmac; -+ u8 tmpmac[ETH_ALEN]; - void __iomem *io; - u8 *macaddr; - u32 reg; -@@ -93,7 +94,7 @@ static void __init kirkwood_dt_eth_fixup - - /* skip disabled nodes or nodes with valid MAC address*/ - if (!of_device_is_available(pnp) || -- !IS_ERR(of_get_mac_address(np))) -+ !of_get_mac_address(np, tmpmac)) - goto eth_fixup_skip; - - clk = of_clk_get(pnp, 0); ---- a/arch/powerpc/sysdev/tsi108_dev.c -+++ b/arch/powerpc/sysdev/tsi108_dev.c -@@ -73,7 +73,6 @@ static int __init tsi108_eth_of_init(voi - struct device_node *phy, *mdio; - hw_info tsi_eth_data; - const unsigned int *phy_id; -- const void *mac_addr; - const phandle *ph; - - memset(r, 0, sizeof(r)); -@@ -101,9 +100,7 @@ static int __init tsi108_eth_of_init(voi - goto err; - } - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(tsi_eth_data.mac_addr, mac_addr); -+ of_get_mac_address(np, tsi_eth_data.mac_addr); - - ph = of_get_property(np, "mdio-handle", NULL); - mdio = of_find_node_by_phandle(*ph); ---- a/drivers/net/ethernet/aeroflex/greth.c -+++ b/drivers/net/ethernet/aeroflex/greth.c -@@ -1451,10 +1451,10 @@ static int greth_of_probe(struct platfor - break; - } - if (i == 6) { -- const u8 *addr; -+ u8 addr[ETH_ALEN]; - -- addr = of_get_mac_address(ofdev->dev.of_node); -- if (!IS_ERR(addr)) { -+ err = of_get_mac_address(ofdev->dev.of_node, addr); -+ if (!err) { - for (i = 0; i < 6; i++) - macaddr[i] = (unsigned int) addr[i]; - } else { ---- a/drivers/net/ethernet/allwinner/sun4i-emac.c -+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c -@@ -807,7 +807,6 @@ static int emac_probe(struct platform_de - struct emac_board_info *db; - struct net_device *ndev; - int ret = 0; -- const char *mac_addr; - - ndev = alloc_etherdev(sizeof(struct emac_board_info)); - if (!ndev) { -@@ -870,12 +869,9 @@ static int emac_probe(struct platform_de - } - - /* Read MAC-address from DT */ -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- -- /* Check if the MAC address is valid, if not get a random one */ -- if (!is_valid_ether_addr(ndev->dev_addr)) { -+ ret = of_get_mac_address(np, ndev->dev_addr); -+ if (ret) { -+ /* if the MAC address is invalid get a random one */ - eth_hw_addr_random(ndev); - dev_warn(&pdev->dev, "using random MAC address %pM\n", - ndev->dev_addr); ---- a/drivers/net/ethernet/altera/altera_tse_main.c -+++ b/drivers/net/ethernet/altera/altera_tse_main.c -@@ -1351,7 +1351,6 @@ static int altera_tse_probe(struct platf - struct resource *control_port; - struct resource *dma_res; - struct altera_tse_private *priv; -- const unsigned char *macaddr; - void __iomem *descmap; - const struct of_device_id *of_id = NULL; - -@@ -1528,10 +1527,8 @@ static int altera_tse_probe(struct platf - priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE; - - /* get default MAC address from device tree */ -- macaddr = of_get_mac_address(pdev->dev.of_node); -- if (!IS_ERR(macaddr)) -- ether_addr_copy(ndev->dev_addr, macaddr); -- else -+ ret = of_get_mac_address(pdev->dev.of_node, ndev->dev_addr); -+ if (ret) - eth_hw_addr_random(ndev); - - /* get phy addr and create mdio */ ---- a/drivers/net/ethernet/arc/emac_main.c -+++ b/drivers/net/ethernet/arc/emac_main.c -@@ -870,7 +870,6 @@ int arc_emac_probe(struct net_device *nd - struct device_node *phy_node; - struct phy_device *phydev = NULL; - struct arc_emac_priv *priv; -- const char *mac_addr; - unsigned int id, clock_frequency, irq; - int err; - -@@ -955,11 +954,8 @@ int arc_emac_probe(struct net_device *nd - } - - /* Get MAC address from device tree */ -- mac_addr = of_get_mac_address(dev->of_node); -- -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- else -+ err = of_get_mac_address(dev->of_node, ndev->dev_addr); -+ if (err) - eth_hw_addr_random(ndev); - - arc_emac_set_address_internal(ndev); ---- a/drivers/net/ethernet/atheros/ag71xx.c -+++ b/drivers/net/ethernet/atheros/ag71xx.c -@@ -1634,7 +1634,6 @@ static int ag71xx_probe(struct platform_ - const struct ag71xx_dcfg *dcfg; - struct net_device *ndev; - struct resource *res; -- const void *mac_addr; - int tx_size, err, i; - struct ag71xx *ag; - -@@ -1735,10 +1734,8 @@ static int ag71xx_probe(struct platform_ - ag->stop_desc->ctrl = 0; - ag->stop_desc->next = (u32)ag->stop_desc_dma; - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); -- if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) { -+ err = of_get_mac_address(np, ndev->dev_addr); -+ if (err) { - netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); - eth_random_addr(ndev->dev_addr); - } ---- a/drivers/net/ethernet/broadcom/bcmsysport.c -+++ b/drivers/net/ethernet/broadcom/bcmsysport.c -@@ -2423,7 +2423,6 @@ static int bcm_sysport_probe(struct plat - struct bcm_sysport_priv *priv; - struct device_node *dn; - struct net_device *dev; -- const void *macaddr; - u32 txq, rxq; - int ret; - -@@ -2505,12 +2504,10 @@ static int bcm_sysport_probe(struct plat - } - - /* Initialize netdevice members */ -- macaddr = of_get_mac_address(dn); -- if (IS_ERR(macaddr)) { -+ ret = of_get_mac_address(dn, dev->dev_addr); -+ if (ret) { - dev_warn(&pdev->dev, "using random Ethernet MAC\n"); - eth_hw_addr_random(dev); -- } else { -- ether_addr_copy(dev->dev_addr, macaddr); - } - - SET_NETDEV_DEV(dev, &pdev->dev); ---- a/drivers/net/ethernet/broadcom/bgmac-bcma.c -+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c -@@ -115,7 +115,7 @@ static int bgmac_probe(struct bcma_devic - struct ssb_sprom *sprom = &core->bus->sprom; - struct mii_bus *mii_bus; - struct bgmac *bgmac; -- const u8 *mac = NULL; -+ const u8 *mac; - int err; - - bgmac = bgmac_alloc(&core->dev); -@@ -128,11 +128,10 @@ static int bgmac_probe(struct bcma_devic - - bcma_set_drvdata(core, bgmac); - -- if (bgmac->dev->of_node) -- mac = of_get_mac_address(bgmac->dev->of_node); -+ err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr); - - /* If no MAC address assigned via device tree, check SPROM */ -- if (IS_ERR_OR_NULL(mac)) { -+ if (err) { - switch (core->core_unit) { - case 0: - mac = sprom->et0mac; -@@ -149,10 +148,9 @@ static int bgmac_probe(struct bcma_devic - err = -ENOTSUPP; - goto err; - } -+ ether_addr_copy(bgmac->net_dev->dev_addr, mac); - } - -- ether_addr_copy(bgmac->net_dev->dev_addr, mac); -- - /* On BCM4706 we need common core to access PHY */ - if (core->id.id == BCMA_CORE_4706_MAC_GBIT && - !core->bus->drv_gmac_cmn.core) { ---- a/drivers/net/ethernet/broadcom/bgmac-platform.c -+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c -@@ -173,7 +173,7 @@ static int bgmac_probe(struct platform_d - struct device_node *np = pdev->dev.of_node; - struct bgmac *bgmac; - struct resource *regs; -- const u8 *mac_addr; -+ int ret; - - bgmac = bgmac_alloc(&pdev->dev); - if (!bgmac) -@@ -192,11 +192,10 @@ static int bgmac_probe(struct platform_d - bgmac->dev = &pdev->dev; - bgmac->dma_dev = &pdev->dev; - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(bgmac->net_dev->dev_addr, mac_addr); -- else -- dev_warn(&pdev->dev, "MAC address not present in device tree\n"); -+ ret = of_get_mac_address(np, bgmac->net_dev->dev_addr); -+ if (ret) -+ dev_warn(&pdev->dev, -+ "MAC address not present in device tree\n"); - - bgmac->irq = platform_get_irq(pdev, 0); - if (bgmac->irq < 0) ---- a/drivers/net/ethernet/cadence/macb_main.c -+++ b/drivers/net/ethernet/cadence/macb_main.c -@@ -4206,7 +4206,6 @@ static int macb_probe(struct platform_de - struct net_device *dev; - struct resource *regs; - void __iomem *mem; -- const char *mac; - struct macb *bp; - int err, val; - -@@ -4319,15 +4318,11 @@ static int macb_probe(struct platform_de - if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) - bp->rx_intr_mask |= MACB_BIT(RXUBR); - -- mac = of_get_mac_address(np); -- if (PTR_ERR(mac) == -EPROBE_DEFER) { -- err = -EPROBE_DEFER; -+ err = of_get_mac_address(np, bp->dev->dev_addr); -+ if (err == -EPROBE_DEFER) - goto err_out_free_netdev; -- } else if (!IS_ERR_OR_NULL(mac)) { -- ether_addr_copy(bp->dev->dev_addr, mac); -- } else { -+ else if (err) - macb_get_hwaddr(bp); -- } - - err = of_get_phy_mode(np); - if (err < 0) ---- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c -+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c -@@ -1391,7 +1391,6 @@ static int octeon_mgmt_probe(struct plat - struct net_device *netdev; - struct octeon_mgmt *p; - const __be32 *data; -- const u8 *mac; - struct resource *res_mix; - struct resource *res_agl; - struct resource *res_agl_prt_ctl; -@@ -1508,11 +1507,8 @@ static int octeon_mgmt_probe(struct plat - netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; - netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; - -- mac = of_get_mac_address(pdev->dev.of_node); -- -- if (!IS_ERR(mac)) -- ether_addr_copy(netdev->dev_addr, mac); -- else -+ result = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); -+ if (result) - eth_hw_addr_random(netdev); - - p->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); ---- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c -+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c -@@ -1474,7 +1474,6 @@ static int bgx_init_of_phy(struct bgx *b - device_for_each_child_node(&bgx->pdev->dev, fwn) { - struct phy_device *pd; - struct device_node *phy_np; -- const char *mac; - - /* Should always be an OF node. But if it is not, we - * cannot handle it, so exit the loop. -@@ -1483,9 +1482,7 @@ static int bgx_init_of_phy(struct bgx *b - if (!node) - break; - -- mac = of_get_mac_address(node); -- if (!IS_ERR(mac)) -- ether_addr_copy(bgx->lmac[lmac].mac, mac); -+ of_get_mac_address(node, bgx->lmac[lmac].mac); - - SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev); - bgx->lmac[lmac].lmacid = lmac; ---- a/drivers/net/ethernet/davicom/dm9000.c -+++ b/drivers/net/ethernet/davicom/dm9000.c -@@ -1390,7 +1390,7 @@ static struct dm9000_plat_data *dm9000_p - { - struct dm9000_plat_data *pdata; - struct device_node *np = dev->of_node; -- const void *mac_addr; -+ int ret; - - if (!IS_ENABLED(CONFIG_OF) || !np) - return ERR_PTR(-ENXIO); -@@ -1404,11 +1404,9 @@ static struct dm9000_plat_data *dm9000_p - if (of_find_property(np, "davicom,no-eeprom", NULL)) - pdata->flags |= DM9000_PLATF_NO_EEPROM; - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(pdata->dev_addr, mac_addr); -- else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) -- return ERR_CAST(mac_addr); -+ ret = of_get_mac_address(np, pdata->dev_addr); -+ if (ret == -EPROBE_DEFER) -+ return ERR_PTR(ret); - - return pdata; - } ---- a/drivers/net/ethernet/ethoc.c -+++ b/drivers/net/ethernet/ethoc.c -@@ -1147,11 +1147,7 @@ static int ethoc_probe(struct platform_d - ether_addr_copy(netdev->dev_addr, pdata->hwaddr); - priv->phy_id = pdata->phy_id; - } else { -- const void *mac; -- -- mac = of_get_mac_address(pdev->dev.of_node); -- if (!IS_ERR(mac)) -- ether_addr_copy(netdev->dev_addr, mac); -+ of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); - priv->phy_id = -1; - } - ---- a/drivers/net/ethernet/ezchip/nps_enet.c -+++ b/drivers/net/ethernet/ezchip/nps_enet.c -@@ -575,7 +575,6 @@ static s32 nps_enet_probe(struct platfor - struct net_device *ndev; - struct nps_enet_priv *priv; - s32 err = 0; -- const char *mac_addr; - - if (!dev->of_node) - return -ENODEV; -@@ -602,10 +601,8 @@ static s32 nps_enet_probe(struct platfor - dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base); - - /* set kernel MAC address to dev */ -- mac_addr = of_get_mac_address(dev->of_node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- else -+ err = of_get_mac_address(dev->of_node, ndev->dev_addr); -+ if (err) - eth_hw_addr_random(ndev); - - /* Get IRQ number */ ---- a/drivers/net/ethernet/freescale/fec_main.c -+++ b/drivers/net/ethernet/freescale/fec_main.c -@@ -1695,6 +1695,7 @@ static void fec_get_mac(struct net_devic - struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev); - unsigned char *iap, tmpaddr[ETH_ALEN]; -+ int ret; - - /* - * try to get mac address in following order: -@@ -1710,9 +1711,9 @@ static void fec_get_mac(struct net_devic - if (!is_valid_ether_addr(iap)) { - struct device_node *np = fep->pdev->dev.of_node; - if (np) { -- const char *mac = of_get_mac_address(np); -- if (!IS_ERR(mac)) -- iap = (unsigned char *) mac; -+ ret = of_get_mac_address(np, tmpaddr); -+ if (!ret) -+ iap = tmpaddr; - } - } - ---- a/drivers/net/ethernet/freescale/fec_mpc52xx.c -+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c -@@ -823,7 +823,6 @@ static int mpc52xx_fec_probe(struct plat - const u32 *prop; - int prop_size; - struct device_node *np = op->dev.of_node; -- const char *mac_addr; - - phys_addr_t rx_fifo; - phys_addr_t tx_fifo; -@@ -901,10 +900,8 @@ static int mpc52xx_fec_probe(struct plat - * - * First try to read MAC address from DT - */ -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) { -- ether_addr_copy(ndev->dev_addr, mac_addr); -- } else { -+ rv = of_get_mac_address(np, ndev->dev_addr); -+ if (rv) { - struct mpc52xx_fec __iomem *fec = priv->fec; - - /* ---- a/drivers/net/ethernet/freescale/fman/mac.c -+++ b/drivers/net/ethernet/freescale/fman/mac.c -@@ -616,7 +616,6 @@ static int mac_probe(struct platform_dev - struct platform_device *of_dev; - struct resource res; - struct mac_priv_s *priv; -- const u8 *mac_addr; - u32 val; - u8 fman_id; - int phy_if; -@@ -734,13 +733,12 @@ static int mac_probe(struct platform_dev - priv->cell_index = (u8)val; - - /* Get the MAC address */ -- mac_addr = of_get_mac_address(mac_node); -- if (IS_ERR(mac_addr)) { -+ err = of_get_mac_address(mac_node, mac_dev->addr); -+ if (err) { - dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node); - err = -EINVAL; - goto _return_of_get_parent; - } -- ether_addr_copy(mac_dev->addr, mac_addr); - - /* Get the port handles */ - nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL); ---- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c -+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c -@@ -925,7 +925,6 @@ static int fs_enet_probe(struct platform - const u32 *data; - struct clk *clk; - int err; -- const u8 *mac_addr; - const char *phy_connection_type; - int privsize, len, ret = -ENODEV; - -@@ -1013,9 +1012,7 @@ static int fs_enet_probe(struct platform - spin_lock_init(&fep->lock); - spin_lock_init(&fep->tx_lock); - -- mac_addr = of_get_mac_address(ofdev->dev.of_node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -+ of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); - - ret = fep->ops->allocate_bd(ndev); - if (ret) ---- a/drivers/net/ethernet/freescale/gianfar.c -+++ b/drivers/net/ethernet/freescale/gianfar.c -@@ -643,7 +643,6 @@ static phy_interface_t gfar_get_interfac - static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) - { - const char *model; -- const void *mac_addr; - int err = 0, i; - struct net_device *dev = NULL; - struct gfar_private *priv = NULL; -@@ -784,10 +783,7 @@ static int gfar_of_init(struct platform_ - if (stash_len || stash_idx) - priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING; - -- mac_addr = of_get_mac_address(np); -- -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(dev->dev_addr, mac_addr); -+ err = of_get_mac_address(np, dev->dev_addr); - - if (model && !strcasecmp(model, "TSEC")) - priv->device_flags |= FSL_GIANFAR_DEV_HAS_GIGABIT | ---- a/drivers/net/ethernet/freescale/ucc_geth.c -+++ b/drivers/net/ethernet/freescale/ucc_geth.c -@@ -3697,7 +3697,6 @@ static int ucc_geth_probe(struct platfor - int err, ucc_num, max_speed = 0; - const unsigned int *prop; - const char *sprop; -- const void *mac_addr; - phy_interface_t phy_interface; - static const int enet_to_speed[] = { - SPEED_10, SPEED_10, SPEED_10, -@@ -3907,9 +3906,7 @@ static int ucc_geth_probe(struct platfor - goto err_free_netdev; - } - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(dev->dev_addr, mac_addr); -+ of_get_mac_address(np, dev->dev_addr); - - ugeth->ug_info = ug_info; - ugeth->dev = device; ---- a/drivers/net/ethernet/hisilicon/hisi_femac.c -+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c -@@ -784,7 +784,6 @@ static int hisi_femac_drv_probe(struct p - struct net_device *ndev; - struct hisi_femac_priv *priv; - struct phy_device *phy; -- const char *mac_addr; - int ret; - - ndev = alloc_etherdev(sizeof(*priv)); -@@ -854,10 +853,8 @@ static int hisi_femac_drv_probe(struct p - (unsigned long)phy->phy_id, - phy_modes(phy->interface)); - -- mac_addr = of_get_mac_address(node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- if (!is_valid_ether_addr(ndev->dev_addr)) { -+ ret = of_get_mac_address(node, ndev->dev_addr); -+ if (ret) { - eth_hw_addr_random(ndev); - dev_warn(dev, "using random MAC address %pM\n", - ndev->dev_addr); ---- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c -+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c -@@ -1098,7 +1098,6 @@ static int hix5hd2_dev_probe(struct plat - struct net_device *ndev; - struct hix5hd2_priv *priv; - struct mii_bus *bus; -- const char *mac_addr; - int ret; - - ndev = alloc_etherdev(sizeof(struct hix5hd2_priv)); -@@ -1221,10 +1220,8 @@ static int hix5hd2_dev_probe(struct plat - goto out_phy_node; - } - -- mac_addr = of_get_mac_address(node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- if (!is_valid_ether_addr(ndev->dev_addr)) { -+ ret = of_get_mac_address(node, ndev->dev_addr); -+ if (ret) { - eth_hw_addr_random(ndev); - netdev_warn(ndev, "using random MAC address %pM\n", - ndev->dev_addr); ---- a/drivers/net/ethernet/lantiq_xrx200.c -+++ b/drivers/net/ethernet/lantiq_xrx200.c -@@ -439,7 +439,6 @@ static int xrx200_probe(struct platform_ - struct resource *res; - struct xrx200_priv *priv; - struct net_device *net_dev; -- const u8 *mac; - int err; - - /* alloc the network device */ -@@ -483,10 +482,8 @@ static int xrx200_probe(struct platform_ - return PTR_ERR(priv->clk); - } - -- mac = of_get_mac_address(np); -- if (!IS_ERR(mac)) -- ether_addr_copy(net_dev->dev_addr, mac); -- else -+ err = of_get_mac_address(np, net_dev->dev_addr); -+ if (err) - eth_hw_addr_random(net_dev); - - /* bring up the dma engine and IP core */ ---- a/drivers/net/ethernet/marvell/mv643xx_eth.c -+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c -@@ -2705,7 +2705,6 @@ static int mv643xx_eth_shared_of_add_por - struct platform_device *ppdev; - struct mv643xx_eth_platform_data ppd; - struct resource res; -- const char *mac_addr; - int ret; - int dev_num = 0; - -@@ -2736,9 +2735,7 @@ static int mv643xx_eth_shared_of_add_por - return -EINVAL; - } - -- mac_addr = of_get_mac_address(pnp); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ppd.mac_addr, mac_addr); -+ of_get_mac_address(pnp, ppd.mac_addr); - - mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); - mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); ---- a/drivers/net/ethernet/marvell/mvneta.c -+++ b/drivers/net/ethernet/marvell/mvneta.c -@@ -4526,7 +4526,6 @@ static int mvneta_probe(struct platform_ - struct net_device *dev; - struct phylink *phylink; - struct phy *comphy; -- const char *dt_mac_addr; - char hw_mac_addr[ETH_ALEN]; - const char *mac_from; - int tx_csum_limit; -@@ -4623,10 +4622,9 @@ static int mvneta_probe(struct platform_ - goto err_free_ports; - } - -- dt_mac_addr = of_get_mac_address(dn); -- if (!IS_ERR(dt_mac_addr)) { -+ err = of_get_mac_address(dn, dev->dev_addr); -+ if (!err) { - mac_from = "device tree"; -- ether_addr_copy(dev->dev_addr, dt_mac_addr); - } else { - mvneta_get_mac_addr(pp, hw_mac_addr); - if (is_valid_ether_addr(hw_mac_addr)) { ---- a/drivers/net/ethernet/marvell/pxa168_eth.c -+++ b/drivers/net/ethernet/marvell/pxa168_eth.c -@@ -1402,7 +1402,6 @@ static int pxa168_eth_probe(struct platf - struct resource *res; - struct clk *clk; - struct device_node *np; -- const unsigned char *mac_addr = NULL; - int err; - - printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); -@@ -1445,12 +1444,8 @@ static int pxa168_eth_probe(struct platf - - INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); - -- if (pdev->dev.of_node) -- mac_addr = of_get_mac_address(pdev->dev.of_node); -- -- if (!IS_ERR_OR_NULL(mac_addr)) { -- ether_addr_copy(dev->dev_addr, mac_addr); -- } else { -+ err = of_get_mac_address(pdev->dev.of_node, dev->dev_addr); -+ if (err) { - /* try reading the mac address, if set by the bootloader */ - pxa168_eth_get_mac_address(dev, dev->dev_addr); - if (!is_valid_ether_addr(dev->dev_addr)) { ---- a/drivers/net/ethernet/marvell/sky2.c -+++ b/drivers/net/ethernet/marvell/sky2.c -@@ -4721,7 +4721,7 @@ static struct net_device *sky2_init_netd - { - struct sky2_port *sky2; - struct net_device *dev = alloc_etherdev(sizeof(*sky2)); -- const void *iap; -+ int ret; - - if (!dev) - return NULL; -@@ -4791,10 +4791,8 @@ static struct net_device *sky2_init_netd - * 1) from device tree data - * 2) from internal registers set by bootloader - */ -- iap = of_get_mac_address(hw->pdev->dev.of_node); -- if (!IS_ERR(iap)) -- ether_addr_copy(dev->dev_addr, iap); -- else -+ ret = of_get_mac_address(hw->pdev->dev.of_node, dev->dev_addr); -+ if (ret) - memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, - ETH_ALEN); - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -@@ -2490,14 +2490,11 @@ static int __init mtk_init(struct net_de - { - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; -- const char *mac_addr; -+ int ret; - -- mac_addr = of_get_mac_address(mac->of_node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(dev->dev_addr, mac_addr); -- -- /* If the mac address is invalid, use random mac address */ -- if (!is_valid_ether_addr(dev->dev_addr)) { -+ ret = of_get_mac_address(mac->of_node, dev->dev_addr); -+ if (ret) { -+ /* If the mac address is invalid, use random mac address */ - eth_hw_addr_random(dev); - dev_err(eth->dev, "generated random MAC address %pM\n", - dev->dev_addr); ---- a/drivers/net/ethernet/micrel/ks8851.c -+++ b/drivers/net/ethernet/micrel/ks8851.c -@@ -419,11 +419,10 @@ static void ks8851_read_mac_addr(struct - static void ks8851_init_mac(struct ks8851_net *ks) - { - struct net_device *dev = ks->netdev; -- const u8 *mac_addr; -+ int ret; - -- mac_addr = of_get_mac_address(ks->spidev->dev.of_node); -- if (!IS_ERR(mac_addr)) { -- ether_addr_copy(dev->dev_addr, mac_addr); -+ ret = of_get_mac_address(ks->spidev->dev.of_node, dev->dev_addr); -+ if (!ret) { - ks8851_write_mac_addr(dev); - return; - } ---- a/drivers/net/ethernet/micrel/ks8851_mll.c -+++ b/drivers/net/ethernet/micrel/ks8851_mll.c -@@ -1239,7 +1239,6 @@ static int ks8851_probe(struct platform_ - struct net_device *netdev; - struct ks_net *ks; - u16 id, data; -- const char *mac; - - netdev = alloc_etherdev(sizeof(struct ks_net)); - if (!netdev) -@@ -1326,9 +1325,7 @@ static int ks8851_probe(struct platform_ - - /* overwriting the default MAC address */ - if (pdev->dev.of_node) { -- mac = of_get_mac_address(pdev->dev.of_node); -- if (!IS_ERR(mac)) -- ether_addr_copy(ks->mac_addr, mac); -+ of_get_mac_address(pdev->dev.of_node, ks->mac_addr); - } else { - struct ks8851_mll_platform_data *pdata; - ---- a/drivers/net/ethernet/nxp/lpc_eth.c -+++ b/drivers/net/ethernet/nxp/lpc_eth.c -@@ -1349,9 +1349,7 @@ static int lpc_eth_drv_probe(struct plat - __lpc_get_mac(pldat, ndev->dev_addr); - - if (!is_valid_ether_addr(ndev->dev_addr)) { -- const char *macaddr = of_get_mac_address(np); -- if (!IS_ERR(macaddr)) -- ether_addr_copy(ndev->dev_addr, macaddr); -+ of_get_mac_address(np, ndev->dev_addr); - } - if (!is_valid_ether_addr(ndev->dev_addr)) - eth_hw_addr_random(ndev); ---- a/drivers/net/ethernet/qualcomm/qca_spi.c -+++ b/drivers/net/ethernet/qualcomm/qca_spi.c -@@ -885,7 +885,7 @@ qca_spi_probe(struct spi_device *spi) - struct net_device *qcaspi_devs = NULL; - u8 legacy_mode = 0; - u16 signature; -- const char *mac; -+ int ret; - - if (!spi->dev.of_node) { - dev_err(&spi->dev, "Missing device tree\n"); -@@ -962,12 +962,8 @@ qca_spi_probe(struct spi_device *spi) - - spi_set_drvdata(spi, qcaspi_devs); - -- mac = of_get_mac_address(spi->dev.of_node); -- -- if (!IS_ERR(mac)) -- ether_addr_copy(qca->net_dev->dev_addr, mac); -- -- if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { -+ ret = of_get_mac_address(spi->dev.of_node, qca->net_dev->dev_addr); -+ if (ret) { - eth_hw_addr_random(qca->net_dev); - dev_info(&spi->dev, "Using random MAC address: %pM\n", - qca->net_dev->dev_addr); ---- a/drivers/net/ethernet/qualcomm/qca_uart.c -+++ b/drivers/net/ethernet/qualcomm/qca_uart.c -@@ -323,7 +323,6 @@ static int qca_uart_probe(struct serdev_ - { - struct net_device *qcauart_dev = alloc_etherdev(sizeof(struct qcauart)); - struct qcauart *qca; -- const char *mac; - u32 speed = 115200; - int ret; - -@@ -348,12 +347,8 @@ static int qca_uart_probe(struct serdev_ - - of_property_read_u32(serdev->dev.of_node, "current-speed", &speed); - -- mac = of_get_mac_address(serdev->dev.of_node); -- -- if (!IS_ERR(mac)) -- ether_addr_copy(qca->net_dev->dev_addr, mac); -- -- if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { -+ ret = of_get_mac_address(serdev->dev.of_node, qca->net_dev->dev_addr); -+ if (ret) { - eth_hw_addr_random(qca->net_dev); - dev_info(&serdev->dev, "Using random MAC address: %pM\n", - qca->net_dev->dev_addr); ---- a/drivers/net/ethernet/renesas/ravb_main.c -+++ b/drivers/net/ethernet/renesas/ravb_main.c -@@ -109,11 +109,13 @@ static void ravb_set_buffer_align(struct - * Ethernet AVB device doesn't have ROM for MAC address. - * This function gets the MAC address that was used by a bootloader. - */ --static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac) -+static void ravb_read_mac_address(struct device_node *np, -+ struct net_device *ndev) - { -- if (!IS_ERR(mac)) { -- ether_addr_copy(ndev->dev_addr, mac); -- } else { -+ int ret; -+ -+ ret = of_get_mac_address(np, ndev->dev_addr); -+ if (ret) { - u32 mahr = ravb_read(ndev, MAHR); - u32 malr = ravb_read(ndev, MALR); - -@@ -2152,7 +2154,7 @@ static int ravb_probe(struct platform_de - priv->msg_enable = RAVB_DEF_MSG_ENABLE; - - /* Read and set MAC address */ -- ravb_read_mac_address(ndev, of_get_mac_address(np)); -+ ravb_read_mac_address(np, ndev); - if (!is_valid_ether_addr(ndev->dev_addr)) { - dev_warn(&pdev->dev, - "no valid MAC address supplied, using a random one\n"); ---- a/drivers/net/ethernet/renesas/sh_eth.c -+++ b/drivers/net/ethernet/renesas/sh_eth.c -@@ -3195,7 +3195,6 @@ static struct sh_eth_plat_data *sh_eth_p - { - struct device_node *np = dev->of_node; - struct sh_eth_plat_data *pdata; -- const char *mac_addr; - int ret; - - pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); -@@ -3207,9 +3206,7 @@ static struct sh_eth_plat_data *sh_eth_p - return NULL; - pdata->phy_interface = ret; - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(pdata->mac_addr, mac_addr); -+ of_get_mac_address(np, pdata->mac_addr); - - pdata->no_ether_link = - of_property_read_bool(np, "renesas,no-ether-link"); ---- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c -+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c -@@ -25,8 +25,7 @@ - - #ifdef CONFIG_OF - static int sxgbe_probe_config_dt(struct platform_device *pdev, -- struct sxgbe_plat_data *plat, -- const char **mac) -+ struct sxgbe_plat_data *plat) - { - struct device_node *np = pdev->dev.of_node; - struct sxgbe_dma_cfg *dma_cfg; -@@ -34,7 +33,6 @@ static int sxgbe_probe_config_dt(struct - if (!np) - return -ENODEV; - -- *mac = of_get_mac_address(np); - plat->interface = of_get_phy_mode(np); - - plat->bus_id = of_alias_get_id(np, "ethernet"); -@@ -60,8 +58,7 @@ static int sxgbe_probe_config_dt(struct - } - #else - static int sxgbe_probe_config_dt(struct platform_device *pdev, -- struct sxgbe_plat_data *plat, -- const char **mac) -+ struct sxgbe_plat_data *plat) - { - return -ENOSYS; - } -@@ -82,7 +79,6 @@ static int sxgbe_platform_probe(struct p - void __iomem *addr; - struct sxgbe_priv_data *priv = NULL; - struct sxgbe_plat_data *plat_dat = NULL; -- const char *mac = NULL; - struct net_device *ndev = platform_get_drvdata(pdev); - struct device_node *node = dev->of_node; - -@@ -98,7 +94,7 @@ static int sxgbe_platform_probe(struct p - if (!plat_dat) - return -ENOMEM; - -- ret = sxgbe_probe_config_dt(pdev, plat_dat, &mac); -+ ret = sxgbe_probe_config_dt(pdev, plat_dat); - if (ret) { - pr_err("%s: main dt probe failed\n", __func__); - return ret; -@@ -119,8 +115,7 @@ static int sxgbe_platform_probe(struct p - } - - /* Get MAC address if available (DT) */ -- if (!IS_ERR_OR_NULL(mac)) -- ether_addr_copy(priv->dev->dev_addr, mac); -+ of_get_mac_address(node, priv->dev->dev_addr); - - /* Get the TX/RX IRQ numbers */ - for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) { ---- a/drivers/net/ethernet/socionext/sni_ave.c -+++ b/drivers/net/ethernet/socionext/sni_ave.c -@@ -1559,7 +1559,6 @@ static int ave_probe(struct platform_dev - struct ave_private *priv; - struct net_device *ndev; - struct device_node *np; -- const void *mac_addr; - void __iomem *base; - const char *name; - int i, irq, ret; -@@ -1600,12 +1599,9 @@ static int ave_probe(struct platform_dev - - ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN); - -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- -- /* if the mac address is invalid, use random mac address */ -- if (!is_valid_ether_addr(ndev->dev_addr)) { -+ ret = of_get_mac_address(np, ndev->dev_addr); -+ if (ret) { -+ /* if the mac address is invalid, use random mac address */ - eth_hw_addr_random(ndev); - dev_warn(dev, "Using random MAC address: %pM\n", - ndev->dev_addr); ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c -@@ -110,7 +110,7 @@ static int anarion_dwmac_probe(struct pl - if (IS_ERR(gmac)) - return PTR_ERR(gmac); - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c -@@ -438,7 +438,7 @@ static int dwc_eth_dwmac_probe(struct pl - if (IS_ERR(stmmac_res.addr)) - return PTR_ERR(stmmac_res.addr); - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c -@@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct pl - return ret; - - if (pdev->dev.of_node) { -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) { - dev_err(&pdev->dev, "dt configuration failed\n"); - return PTR_ERR(plat_dat); ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c -@@ -254,7 +254,7 @@ static int ipq806x_gmac_probe(struct pla - if (val) - return val; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c -@@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct pl - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c -@@ -348,7 +348,7 @@ static int mediatek_dwmac_probe(struct p - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c -@@ -52,7 +52,7 @@ static int meson6_dwmac_probe(struct pla - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c -@@ -324,7 +324,7 @@ static int meson8b_dwmac_probe(struct pl - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c -@@ -118,7 +118,7 @@ static int oxnas_dwmac_probe(struct plat - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c -@@ -461,7 +461,7 @@ static int qcom_ethqos_probe(struct plat - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) { - dev_err(&pdev->dev, "dt configuration failed\n"); - return PTR_ERR(plat_dat); ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c -@@ -1396,7 +1396,7 @@ static int rk_gmac_probe(struct platform - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c -@@ -398,7 +398,7 @@ static int socfpga_dwmac_probe(struct pl - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c -@@ -320,7 +320,7 @@ static int sti_dwmac_probe(struct platfo - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c -@@ -364,7 +364,7 @@ static int stm32_dwmac_probe(struct plat - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c -@@ -1127,7 +1127,7 @@ static int sun8i_dwmac_probe(struct plat - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c -+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c -@@ -108,7 +108,7 @@ static int sun7i_gmac_probe(struct platf - if (ret) - return ret; - -- plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); -+ plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h -@@ -25,7 +25,7 @@ - - struct stmmac_resources { - void __iomem *addr; -- const char *mac; -+ u8 mac[ETH_ALEN]; - int wol_irq; - int lpi_irq; - int irq; ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c -@@ -4471,7 +4471,7 @@ int stmmac_dvr_probe(struct device *devi - priv->wol_irq = res->wol_irq; - priv->lpi_irq = res->lpi_irq; - -- if (!IS_ERR_OR_NULL(res->mac)) -+ if (!is_zero_ether_addr(res->mac)) - memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); - - dev_set_drvdata(device, priv->dev); ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c -@@ -393,7 +393,7 @@ static int stmmac_of_get_mac_mode(struct - * set some private fields that will be used by the main at runtime. - */ - struct plat_stmmacenet_data * --stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) -+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) - { - struct device_node *np = pdev->dev.of_node; - struct plat_stmmacenet_data *plat; -@@ -404,12 +404,12 @@ stmmac_probe_config_dt(struct platform_d - if (!plat) - return ERR_PTR(-ENOMEM); - -- *mac = of_get_mac_address(np); -- if (IS_ERR(*mac)) { -- if (PTR_ERR(*mac) == -EPROBE_DEFER) -- return ERR_CAST(*mac); -+ rc = of_get_mac_address(np, mac); -+ if (rc) { -+ if (rc == -EPROBE_DEFER) -+ return ERR_PTR(rc); - -- *mac = NULL; -+ eth_zero_addr(mac); - } - - plat->phy_interface = of_get_phy_mode(np); -@@ -639,7 +639,7 @@ void stmmac_remove_config_dt(struct plat - } - #else - struct plat_stmmacenet_data * --stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) -+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) - { - return ERR_PTR(-EINVAL); - } ---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h -+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h -@@ -12,7 +12,7 @@ - #include "stmmac.h" - - struct plat_stmmacenet_data * --stmmac_probe_config_dt(struct platform_device *pdev, const char **mac); -+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); - void stmmac_remove_config_dt(struct platform_device *pdev, - struct plat_stmmacenet_data *plat); - ---- a/drivers/net/ethernet/ti/cpsw.c -+++ b/drivers/net/ethernet/ti/cpsw.c -@@ -2555,7 +2555,6 @@ static int cpsw_probe_dt(struct cpsw_pla - - for_each_available_child_of_node(node, slave_node) { - struct cpsw_slave_data *slave_data = data->slave_data + i; -- const void *mac_addr = NULL; - int lenp; - const __be32 *parp; - -@@ -2628,10 +2627,8 @@ static int cpsw_probe_dt(struct cpsw_pla - } - - no_phy_slave: -- mac_addr = of_get_mac_address(slave_node); -- if (!IS_ERR(mac_addr)) { -- ether_addr_copy(slave_data->mac_addr, mac_addr); -- } else { -+ ret = of_get_mac_address(slave_node, slave_data->mac_addr); -+ if (ret) { - ret = ti_cm_get_macid(&pdev->dev, i, - slave_data->mac_addr); - if (ret) ---- a/drivers/net/ethernet/ti/davinci_emac.c -+++ b/drivers/net/ethernet/ti/davinci_emac.c -@@ -1697,7 +1697,6 @@ davinci_emac_of_get_pdata(struct platfor - const struct of_device_id *match; - const struct emac_platform_data *auxdata; - struct emac_platform_data *pdata = NULL; -- const u8 *mac_addr; - - if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node) - return dev_get_platdata(&pdev->dev); -@@ -1709,11 +1708,8 @@ davinci_emac_of_get_pdata(struct platfor - np = pdev->dev.of_node; - pdata->version = EMAC_VERSION_2; - -- if (!is_valid_ether_addr(pdata->mac_addr)) { -- mac_addr = of_get_mac_address(np); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(pdata->mac_addr, mac_addr); -- } -+ if (!is_valid_ether_addr(pdata->mac_addr)) -+ of_get_mac_address(np, pdata->mac_addr); - - of_property_read_u32(np, "ti,davinci-ctrl-reg-offset", - &pdata->ctrl_reg_offset); ---- a/drivers/net/ethernet/ti/netcp_core.c -+++ b/drivers/net/ethernet/ti/netcp_core.c -@@ -1966,7 +1966,6 @@ static int netcp_create_interface(struct - struct resource res; - void __iomem *efuse = NULL; - u32 efuse_mac = 0; -- const void *mac_addr; - u8 efuse_mac_addr[6]; - u32 temp[2]; - int ret = 0; -@@ -2036,10 +2035,8 @@ static int netcp_create_interface(struct - devm_iounmap(dev, efuse); - devm_release_mem_region(dev, res.start, size); - } else { -- mac_addr = of_get_mac_address(node_interface); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(ndev->dev_addr, mac_addr); -- else -+ ret = of_get_mac_address(node_interface, ndev->dev_addr); -+ if (ret) - eth_random_addr(ndev->dev_addr); - } - ---- a/drivers/net/ethernet/wiznet/w5100-spi.c -+++ b/drivers/net/ethernet/wiznet/w5100-spi.c -@@ -423,8 +423,14 @@ static int w5100_spi_probe(struct spi_de - const struct of_device_id *of_id; - const struct w5100_ops *ops; - kernel_ulong_t driver_data; -+ const void *mac = NULL; -+ u8 tmpmac[ETH_ALEN]; - int priv_size; -- const void *mac = of_get_mac_address(spi->dev.of_node); -+ int ret; -+ -+ ret = of_get_mac_address(spi->dev.of_node, tmpmac); -+ if (!ret) -+ mac = tmpmac; - - if (spi->dev.of_node) { - of_id = of_match_device(w5100_of_match, &spi->dev); ---- a/drivers/net/ethernet/wiznet/w5100.c -+++ b/drivers/net/ethernet/wiznet/w5100.c -@@ -1159,7 +1159,7 @@ int w5100_probe(struct device *dev, cons - INIT_WORK(&priv->setrx_work, w5100_setrx_work); - INIT_WORK(&priv->restart_work, w5100_restart_work); - -- if (!IS_ERR_OR_NULL(mac_addr)) -+ if (mac_addr) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); - else - eth_hw_addr_random(ndev); ---- a/drivers/net/ethernet/xilinx/ll_temac_main.c -+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c -@@ -434,7 +434,7 @@ static void temac_do_set_mac_address(str - - static int temac_init_mac_address(struct net_device *ndev, const void *address) - { -- ether_addr_copy(ndev->dev_addr, address); -+ memcpy(ndev->dev_addr, address, ETH_ALEN); - if (!is_valid_ether_addr(ndev->dev_addr)) - eth_hw_addr_random(ndev); - temac_do_set_mac_address(ndev); -@@ -1296,7 +1296,7 @@ static int temac_probe(struct platform_d - struct temac_local *lp; - struct net_device *ndev; - struct resource *res; -- const void *addr; -+ u8 addr[ETH_ALEN]; - __be32 *p; - bool little_endian; - int rc = 0; -@@ -1492,8 +1492,8 @@ static int temac_probe(struct platform_d - - if (temac_np) { - /* Retrieve the MAC address */ -- addr = of_get_mac_address(temac_np); -- if (IS_ERR(addr)) { -+ rc = of_get_mac_address(temac_np, addr); -+ if (rc) { - dev_err(&pdev->dev, "could not find MAC address\n"); - return -ENODEV; - } ---- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c -+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c -@@ -1665,7 +1665,7 @@ static int axienet_probe(struct platform - struct device_node *np; - struct axienet_local *lp; - struct net_device *ndev; -- const void *mac_addr; -+ u8 mac_addr[ETH_ALEN]; - struct resource *ethres; - u32 value; - -@@ -1837,13 +1837,14 @@ static int axienet_probe(struct platform - dev_info(&pdev->dev, "Ethernet core IRQ not defined\n"); - - /* Retrieve the MAC address */ -- mac_addr = of_get_mac_address(pdev->dev.of_node); -- if (IS_ERR(mac_addr)) { -- dev_warn(&pdev->dev, "could not find MAC address property: %ld\n", -- PTR_ERR(mac_addr)); -- mac_addr = NULL; -+ ret = of_get_mac_address(pdev->dev.of_node, mac_addr); -+ if (!ret) { -+ axienet_set_mac_address(ndev, mac_addr); -+ } else { -+ dev_warn(&pdev->dev, "could not find MAC address property: %d\n", -+ ret); -+ axienet_set_mac_address(ndev, NULL); - } -- axienet_set_mac_address(ndev, mac_addr); - - lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; - lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; ---- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c -+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c -@@ -1113,7 +1113,6 @@ static int xemaclite_of_probe(struct pla - struct net_device *ndev = NULL; - struct net_local *lp = NULL; - struct device *dev = &ofdev->dev; -- const void *mac_address; - - int rc = 0; - -@@ -1155,12 +1154,9 @@ static int xemaclite_of_probe(struct pla - lp->next_rx_buf_to_use = 0x0; - lp->tx_ping_pong = get_bool(ofdev, "xlnx,tx-ping-pong"); - lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong"); -- mac_address = of_get_mac_address(ofdev->dev.of_node); - -- if (!IS_ERR(mac_address)) { -- /* Set the MAC address. */ -- ether_addr_copy(ndev->dev_addr, mac_address); -- } else { -+ rc = of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); -+ if (rc) { - dev_warn(dev, "No MAC address found, using random\n"); - eth_hw_addr_random(ndev); - } ---- a/drivers/net/wireless/ath/ath9k/init.c -+++ b/drivers/net/wireless/ath/ath9k/init.c -@@ -618,7 +618,6 @@ static int ath9k_of_init(struct ath_soft - struct ath_hw *ah = sc->sc_ah; - struct ath_common *common = ath9k_hw_common(ah); - enum ath_bus_type bus_type = common->bus_ops->ath_bus_type; -- const char *mac; - char eeprom_name[100]; - int ret; - -@@ -641,9 +640,7 @@ static int ath9k_of_init(struct ath_soft - ah->ah_flags |= AH_NO_EEP_SWAP; - } - -- mac = of_get_mac_address(np); -- if (!IS_ERR(mac)) -- ether_addr_copy(common->macaddr, mac); -+ of_get_mac_address(np, common->macaddr); - - return 0; - } ---- a/drivers/net/wireless/mediatek/mt76/eeprom.c -+++ b/drivers/net/wireless/mediatek/mt76/eeprom.c -@@ -75,17 +75,9 @@ out_put_node: - void - mt76_eeprom_override(struct mt76_dev *dev) - { --#ifdef CONFIG_OF - struct device_node *np = dev->dev->of_node; -- const u8 *mac; - -- if (!np) -- return; -- -- mac = of_get_mac_address(np); -- if (!IS_ERR(mac)) -- ether_addr_copy(dev->macaddr, mac); --#endif -+ of_get_mac_address(np, dev->macaddr); - - if (!is_valid_ether_addr(dev->macaddr)) { - eth_random_addr(dev->macaddr); ---- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c -+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c -@@ -990,11 +990,7 @@ static void rt2x00lib_rate(struct ieee80 - - void rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr) - { -- const char *mac_addr; -- -- mac_addr = of_get_mac_address(rt2x00dev->dev->of_node); -- if (!IS_ERR(mac_addr)) -- ether_addr_copy(eeprom_mac_addr, mac_addr); -+ of_get_mac_address(rt2x00dev->dev->of_node, eeprom_mac_addr); - - if (!is_valid_ether_addr(eeprom_mac_addr)) { - eth_random_addr(eeprom_mac_addr); ---- a/drivers/of/of_net.c -+++ b/drivers/of/of_net.c -@@ -39,37 +39,29 @@ int of_get_phy_mode(struct device_node * - } - EXPORT_SYMBOL_GPL(of_get_phy_mode); - --static const void *of_get_mac_addr(struct device_node *np, const char *name) -+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) - { - struct property *pp = of_find_property(np, name, NULL); - -- if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) -- return pp->value; -- return NULL; -+ if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) { -+ memcpy(addr, pp->value, ETH_ALEN); -+ return 0; -+ } -+ return -ENODEV; - } - --static const void *of_get_mac_addr_nvmem(struct device_node *np) -+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) - { -- int ret; -- const void *mac; -- u8 nvmem_mac[ETH_ALEN]; - struct platform_device *pdev = of_find_device_by_node(np); -+ int ret; - - if (!pdev) -- return ERR_PTR(-ENODEV); -+ return -ENODEV; - -- ret = nvmem_get_mac_address(&pdev->dev, &nvmem_mac); -- if (ret) { -- put_device(&pdev->dev); -- return ERR_PTR(ret); -- } -- -- mac = devm_kmemdup(&pdev->dev, nvmem_mac, ETH_ALEN, GFP_KERNEL); -+ ret = nvmem_get_mac_address(&pdev->dev, addr); - put_device(&pdev->dev); -- if (!mac) -- return ERR_PTR(-ENOMEM); - -- return mac; -+ return ret; - } - - /** -@@ -92,24 +84,27 @@ static const void *of_get_mac_addr_nvmem - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. - * -- * Return: Will be a valid pointer on success and ERR_PTR in case of error. -+ * Return: 0 on success and errno in case of error. - */ --const void *of_get_mac_address(struct device_node *np) -+int of_get_mac_address(struct device_node *np, u8 *addr) - { -- const void *addr; -- -- addr = of_get_mac_addr(np, "mac-address"); -- if (addr) -- return addr; -+ int ret; - -- addr = of_get_mac_addr(np, "local-mac-address"); -- if (addr) -- return addr; -+ if (!np) -+ return -ENODEV; - -- addr = of_get_mac_addr(np, "address"); -- if (addr) -- return addr; -+ ret = of_get_mac_addr(np, "mac-address", addr); -+ if (!ret) -+ return 0; -+ -+ ret = of_get_mac_addr(np, "local-mac-address", addr); -+ if (!ret) -+ return 0; -+ -+ ret = of_get_mac_addr(np, "address", addr); -+ if (!ret) -+ return 0; - -- return of_get_mac_addr_nvmem(np); -+ return of_get_mac_addr_nvmem(np, addr); - } - EXPORT_SYMBOL(of_get_mac_address); ---- a/drivers/staging/octeon/ethernet.c -+++ b/drivers/staging/octeon/ethernet.c -@@ -407,14 +407,10 @@ static int cvm_oct_common_set_mac_addres - int cvm_oct_common_init(struct net_device *dev) - { - struct octeon_ethernet *priv = netdev_priv(dev); -- const u8 *mac = NULL; -+ int ret; - -- if (priv->of_node) -- mac = of_get_mac_address(priv->of_node); -- -- if (!IS_ERR_OR_NULL(mac)) -- ether_addr_copy(dev->dev_addr, mac); -- else -+ ret = of_get_mac_address(priv->of_node, dev->dev_addr); -+ if (ret) - eth_hw_addr_random(dev); - - /* ---- a/include/linux/of_net.h -+++ b/include/linux/of_net.h -@@ -11,7 +11,7 @@ - - struct net_device; - extern int of_get_phy_mode(struct device_node *np); --extern const void *of_get_mac_address(struct device_node *np); -+extern int of_get_mac_address(struct device_node *np, u8 *mac); - extern struct net_device *of_find_net_device_by_node(struct device_node *np); - #else - static inline int of_get_phy_mode(struct device_node *np) -@@ -19,9 +19,9 @@ static inline int of_get_phy_mode(struct - return -ENODEV; - } - --static inline const void *of_get_mac_address(struct device_node *np) -+static inline int of_get_mac_address(struct device_node *np, u8 *mac) - { -- return ERR_PTR(-ENODEV); -+ return -ENODEV; - } - - static inline struct net_device *of_find_net_device_by_node(struct device_node *np) ---- a/include/net/dsa.h -+++ b/include/net/dsa.h -@@ -186,7 +186,7 @@ struct dsa_port { - unsigned int index; - const char *name; - struct dsa_port *cpu_dp; -- const char *mac; -+ u8 mac[ETH_ALEN]; - struct device_node *dn; - unsigned int ageing_time; - bool vlan_filtering; ---- a/net/dsa/dsa2.c -+++ b/net/dsa/dsa2.c -@@ -318,7 +318,7 @@ static int dsa_port_setup(struct dsa_por - break; - devlink_port_registered = true; - -- dp->mac = of_get_mac_address(dp->dn); -+ of_get_mac_address(dp->dn, dp->mac); - err = dsa_slave_create(dp); - if (err) - break; ---- a/net/dsa/slave.c -+++ b/net/dsa/slave.c -@@ -1414,7 +1414,7 @@ int dsa_slave_create(struct dsa_port *po - slave_dev->hw_features |= NETIF_F_HW_TC; - slave_dev->features |= NETIF_F_LLTX; - slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; -- if (!IS_ERR_OR_NULL(port->mac)) -+ if (!is_zero_ether_addr(port->mac)) - ether_addr_copy(slave_dev->dev_addr, port->mac); - else - eth_hw_addr_inherit(slave_dev, master); ---- a/net/ethernet/eth.c -+++ b/net/ethernet/eth.c -@@ -550,13 +550,14 @@ unsigned char * __weak arch_get_platform - - int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr) - { -- const unsigned char *addr = NULL; -+ unsigned char *addr; -+ int ret; - -- if (dev->of_node) -- addr = of_get_mac_address(dev->of_node); -- if (IS_ERR_OR_NULL(addr)) -- addr = arch_get_platform_mac_address(); -+ ret = of_get_mac_address(dev->of_node, mac_addr); -+ if (!ret) -+ return 0; - -+ addr = arch_get_platform_mac_address(); - if (!addr) - return -ENODEV; - ---- a/drivers/net/usb/smsc75xx.c -+++ b/drivers/net/usb/smsc75xx.c -@@ -757,11 +757,12 @@ static int smsc75xx_ioctl(struct net_dev - - static void smsc75xx_init_mac_address(struct usbnet *dev) - { -- const u8 *mac_addr; -+ u8 mac_addr[ETH_ALEN]; -+ int ret; - - /* maybe the boot loader passed the MAC address in devicetree */ -- mac_addr = of_get_mac_address(dev->udev->dev.of_node); -- if (!IS_ERR(mac_addr)) { -+ ret = of_get_mac_address(dev->udev->dev.of_node, mac_addr); -+ if (!ret) { - ether_addr_copy(dev->net->dev_addr, mac_addr); - return; - } ---- a/drivers/net/usb/smsc95xx.c -+++ b/drivers/net/usb/smsc95xx.c -@@ -901,11 +901,12 @@ static int smsc95xx_ioctl(struct net_dev - - static void smsc95xx_init_mac_address(struct usbnet *dev) - { -- const u8 *mac_addr; -+ u8 mac_addr[ETH_ALEN]; -+ int ret; - - /* maybe the boot loader passed the MAC address in devicetree */ -- mac_addr = of_get_mac_address(dev->udev->dev.of_node); -- if (!IS_ERR(mac_addr)) { -+ ret = of_get_mac_address(dev->udev->dev.of_node, mac_addr); -+ if (!ret) { - ether_addr_copy(dev->net->dev_addr, mac_addr); - return; - } ---- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c -+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c -@@ -3444,10 +3444,11 @@ static int bcmgenet_probe(struct platfor - const struct of_device_id *of_id = NULL; - struct bcmgenet_priv *priv; - struct net_device *dev; -- const void *macaddr; -+ u8 macaddr[ETH_ALEN]; - unsigned int i; - int err = -EIO; - const char *phy_mode_str; -+ int ret; - - /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */ - dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1, -@@ -3474,14 +3475,15 @@ static int bcmgenet_probe(struct platfor - } - - if (dn) { -- macaddr = of_get_mac_address(dn); -- if (IS_ERR(macaddr)) { -+ ret = of_get_mac_address(dn, macaddr); -+ if (ret) { - dev_err(&pdev->dev, "can't find MAC address\n"); - err = -EINVAL; - goto err; - } -+ ether_addr_copy(dev->dev_addr, macaddr); - } else { -- macaddr = pd->mac_address; -+ ether_addr_copy(dev->dev_addr, pd->mac_address); - } - - priv->base = devm_platform_ioremap_resource(pdev, 0); -@@ -3494,7 +3496,6 @@ static int bcmgenet_probe(struct platfor - - SET_NETDEV_DEV(dev, &pdev->dev); - dev_set_drvdata(&pdev->dev, dev); -- ether_addr_copy(dev->dev_addr, macaddr); - dev->watchdog_timeo = 2 * HZ; - dev->ethtool_ops = &bcmgenet_ethtool_ops; - dev->netdev_ops = &bcmgenet_netdev_ops; diff --git a/target/linux/generic/backport-5.4/782-net-next-2-of-net-fix-of_get_mac_addr_nvmem-for-non-platform-devices.patch b/target/linux/generic/backport-5.4/782-net-next-2-of-net-fix-of_get_mac_addr_nvmem-for-non-platform-devices.patch deleted file mode 100644 index c365ac0bb2..0000000000 --- a/target/linux/generic/backport-5.4/782-net-next-2-of-net-fix-of_get_mac_addr_nvmem-for-non-platform-devices.patch +++ /dev/null @@ -1,77 +0,0 @@ -From f10843e04a075202dbb39dfcee047e3a2fdf5a8d Mon Sep 17 00:00:00 2001 -From: Michael Walle <michael@walle.cc> -Date: Mon, 12 Apr 2021 19:47:18 +0200 -Subject: of: net: fix of_get_mac_addr_nvmem() for non-platform devices - -of_get_mac_address() already supports fetching the MAC address by an -nvmem provider. But until now, it was just working for platform devices. -Esp. it was not working for DSA ports and PCI devices. It gets more -common that PCI devices have a device tree binding since SoCs contain -integrated root complexes. - -Use the nvmem of_* binding to fetch the nvmem cells by a struct -device_node. We still have to try to read the cell by device first -because there might be a nvmem_cell_lookup associated with that device. - -Signed-off-by: Michael Walle <michael@walle.cc> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/of/of_net.c | 35 ++++++++++++++++++++++++++++++----- - 1 file changed, 30 insertions(+), 5 deletions(-) - ---- a/drivers/of/of_net.c -+++ b/drivers/of/of_net.c -@@ -11,6 +11,7 @@ - #include <linux/phy.h> - #include <linux/export.h> - #include <linux/device.h> -+#include <linux/nvmem-consumer.h> - - /** - * of_get_phy_mode - Get phy mode for given device_node -@@ -53,15 +54,39 @@ static int of_get_mac_addr(struct device - static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr) - { - struct platform_device *pdev = of_find_device_by_node(np); -+ struct nvmem_cell *cell; -+ const void *mac; -+ size_t len; - int ret; - -- if (!pdev) -- return -ENODEV; -+ /* Try lookup by device first, there might be a nvmem_cell_lookup -+ * associated with a given device. -+ */ -+ if (pdev) { -+ ret = nvmem_get_mac_address(&pdev->dev, addr); -+ put_device(&pdev->dev); -+ return ret; -+ } -+ -+ cell = of_nvmem_cell_get(np, "mac-address"); -+ if (IS_ERR(cell)) -+ return PTR_ERR(cell); -+ -+ mac = nvmem_cell_read(cell, &len); -+ nvmem_cell_put(cell); -+ -+ if (IS_ERR(mac)) -+ return PTR_ERR(mac); -+ -+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { -+ kfree(mac); -+ return -EINVAL; -+ } - -- ret = nvmem_get_mac_address(&pdev->dev, addr); -- put_device(&pdev->dev); -+ memcpy(addr, mac, ETH_ALEN); -+ kfree(mac); - -- return ret; -+ return 0; - } - - /** diff --git a/target/linux/generic/backport-5.4/790-net-phy-at803x-select-correct-page-on-config-init.patch b/target/linux/generic/backport-5.4/790-net-phy-at803x-select-correct-page-on-config-init.patch deleted file mode 100644 index b7e3201fb7..0000000000 --- a/target/linux/generic/backport-5.4/790-net-phy-at803x-select-correct-page-on-config-init.patch +++ /dev/null @@ -1,104 +0,0 @@ -From c329e5afb42ff0a88285eb4d8a391a18793e4777 Mon Sep 17 00:00:00 2001 -From: David Bauer <mail@david-bauer.net> -Date: Thu, 15 Apr 2021 03:26:50 +0200 -Subject: [PATCH] net: phy: at803x: select correct page on config init - -The Atheros AR8031 and AR8033 expose different registers for SGMII/Fiber -as well as the copper side of the PHY depending on the BT_BX_REG_SEL bit -in the chip configure register. - -The driver assumes the copper side is selected on probe, but this might -not be the case depending which page was last selected by the -bootloader. Notably, Ubiquiti UniFi bootloaders show this behavior. - -Select the copper page when probing to circumvent this. - -Signed-off-by: David Bauer <mail@david-bauer.net> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/phy/at803x.c | 50 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 49 insertions(+), 1 deletion(-) - ---- a/drivers/net/phy/at803x.c -+++ b/drivers/net/phy/at803x.c -@@ -67,6 +67,9 @@ - #define ATH8035_PHY_ID 0x004dd072 - #define AT803X_PHY_ID_MASK 0xffffffef - -+#define AT803X_PAGE_FIBER 0 -+#define AT803X_PAGE_COPPER 1 -+ - MODULE_DESCRIPTION("Atheros 803x PHY driver"); - MODULE_AUTHOR("Matus Ujhelyi"); - MODULE_LICENSE("GPL"); -@@ -112,6 +115,35 @@ static int at803x_debug_reg_mask(struct - return phy_write(phydev, AT803X_DEBUG_DATA, val); - } - -+static int at803x_write_page(struct phy_device *phydev, int page) -+{ -+ int mask; -+ int set; -+ -+ if (page == AT803X_PAGE_COPPER) { -+ set = AT803X_BT_BX_REG_SEL; -+ mask = 0; -+ } else { -+ set = 0; -+ mask = AT803X_BT_BX_REG_SEL; -+ } -+ -+ return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set); -+} -+ -+static int at803x_read_page(struct phy_device *phydev) -+{ -+ int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG); -+ -+ if (ccr < 0) -+ return ccr; -+ -+ if (ccr & AT803X_BT_BX_REG_SEL) -+ return AT803X_PAGE_COPPER; -+ -+ return AT803X_PAGE_FIBER; -+} -+ - static int at803x_enable_rx_delay(struct phy_device *phydev) - { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0, -@@ -244,6 +276,7 @@ static int at803x_probe(struct phy_devic - { - struct device *dev = &phydev->mdio.dev; - struct at803x_priv *priv; -+ int ret = 0; - - priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); - if (!priv) -@@ -251,7 +284,16 @@ static int at803x_probe(struct phy_devic - - phydev->priv = priv; - -- return 0; -+ /* Some bootloaders leave the fiber page selected. -+ * Switch to the copper page, as otherwise we read -+ * the PHY capabilities from the fiber side. -+ */ -+ if ((phydev->phy_id & phydev->drv->phy_id_mask) == (ATH8031_PHY_ID & phydev->drv->phy_id_mask)) { -+ ret = phy_select_page(phydev, AT803X_PAGE_COPPER); -+ ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret); -+ } -+ -+ return ret; - } - - static int at803x_config_init(struct phy_device *phydev) -@@ -466,6 +508,8 @@ static struct phy_driver at803x_driver[] - .get_wol = at803x_get_wol, - .suspend = at803x_suspend, - .resume = at803x_resume, -+ .read_page = at803x_read_page, -+ .write_page = at803x_write_page, - /* PHY_GBIT_FEATURES */ - .read_status = at803x_read_status, - .aneg_done = at803x_aneg_done, diff --git a/target/linux/generic/backport-5.4/791-net-phy-at803x-fix-probe-error-if-copper-page-is-sel.patch b/target/linux/generic/backport-5.4/791-net-phy-at803x-fix-probe-error-if-copper-page-is-sel.patch deleted file mode 100644 index ac9583a89e..0000000000 --- a/target/linux/generic/backport-5.4/791-net-phy-at803x-fix-probe-error-if-copper-page-is-sel.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 8f7e876273e294b732b42af2e5e6bba91d798954 Mon Sep 17 00:00:00 2001 -From: Michael Walle <michael@walle.cc> -Date: Tue, 20 Apr 2021 12:29:29 +0200 -Subject: [PATCH] net: phy: at803x: fix probe error if copper page is selected - -The commit c329e5afb42f ("net: phy: at803x: select correct page on -config init") selects the copper page during probe. This fails if the -copper page was already selected. In this case, the value of the copper -page (which is 1) is propagated through phy_restore_page() and is -finally returned for at803x_probe(). Fix it, by just using the -at803x_page_write() directly. - -Also in case of an error, the regulator is not disabled and leads to a -WARN_ON() when the probe fails. This couldn't happen before, because -at803x_parse_dt() was the last call in at803x_probe(). It is hard to -see, that the parse_dt() actually enables the regulator. Thus move the -regulator_enable() to the probe function and undo it in case of an -error. - -Fixes: c329e5afb42f ("net: phy: at803x: select correct page on config init") -Signed-off-by: Michael Walle <michael@walle.cc> -Reviewed-by: David Bauer <mail@david-bauer.net> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/phy/at803x.c | 23 +++++++++++++++++------ - 1 file changed, 17 insertions(+), 6 deletions(-) - ---- a/drivers/net/phy/at803x.c -+++ b/drivers/net/phy/at803x.c -@@ -289,8 +289,9 @@ static int at803x_probe(struct phy_devic - * the PHY capabilities from the fiber side. - */ - if ((phydev->phy_id & phydev->drv->phy_id_mask) == (ATH8031_PHY_ID & phydev->drv->phy_id_mask)) { -- ret = phy_select_page(phydev, AT803X_PAGE_COPPER); -- ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret); -+ mutex_lock(&phydev->mdio.bus->mdio_lock); -+ ret = at803x_write_page(phydev, AT803X_PAGE_COPPER); -+ mutex_unlock(&phydev->mdio.bus->mdio_lock); - } - - return ret; diff --git a/target/linux/generic/backport-5.4/800-v5.5-iio-imu-Add-support-for-the-FXOS8700-IMU.patch b/target/linux/generic/backport-5.4/800-v5.5-iio-imu-Add-support-for-the-FXOS8700-IMU.patch deleted file mode 100644 index b9cd276327..0000000000 --- a/target/linux/generic/backport-5.4/800-v5.5-iio-imu-Add-support-for-the-FXOS8700-IMU.patch +++ /dev/null @@ -1,893 +0,0 @@ -From 84e5ddd5c46ea3bf0cad670da32028994cad5936 Mon Sep 17 00:00:00 2001 -From: Robert Jones <rjones@gateworks.com> -Date: Mon, 14 Oct 2019 11:49:21 -0700 -Subject: [PATCH] iio: imu: Add support for the FXOS8700 IMU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -FXOS8700CQ is a small, low-power, 3-axis linear accelerometer and 3-axis -magnetometer combined into a single package. The device features a -selectable I2C or point-to-point SPI serial interface with 14-bit -accelerometer and 16-bit magnetometer ADC resolution along with -smart-embedded functions. - -FXOS8700CQ has dynamically selectable accelerationfull-scale ranges of -±2 g/±4 g/±8 g and a fixed magnetic measurement range of ±1200 μT. -Output data rates (ODR) from 1.563 Hz to 800 Hz are selectable by the user -for each sensor. Interleaved magnetic and acceleration data is available -at ODR rates of up to 400 Hz. FXOS8700CQ is available in a plastic QFN -package and it is guaranteed to operate over the extended temperature -range of –40 °C to +85 °C. - -TODO: Trigger and IRQ configuration support - -Datasheet: - http://cache.freescale.com/files/sensors/doc/data_sheet/FXOS8700CQ.pdf - -Signed-off-by: Robert Jones <rjones@gateworks.com> -Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> ---- - drivers/iio/imu/Kconfig | 27 ++ - drivers/iio/imu/Makefile | 5 + - drivers/iio/imu/fxos8700.h | 10 + - drivers/iio/imu/fxos8700_core.c | 649 ++++++++++++++++++++++++++++++++++++++++ - drivers/iio/imu/fxos8700_i2c.c | 71 +++++ - drivers/iio/imu/fxos8700_spi.c | 59 ++++ - 6 files changed, 821 insertions(+) - create mode 100644 drivers/iio/imu/fxos8700.h - create mode 100644 drivers/iio/imu/fxos8700_core.c - create mode 100644 drivers/iio/imu/fxos8700_i2c.c - create mode 100644 drivers/iio/imu/fxos8700_spi.c - ---- a/drivers/iio/imu/Kconfig -+++ b/drivers/iio/imu/Kconfig -@@ -40,6 +40,33 @@ config ADIS16480 - - source "drivers/iio/imu/bmi160/Kconfig" - -+config FXOS8700 -+ tristate -+ -+config FXOS8700_I2C -+ tristate "NXP FXOS8700 I2C driver" -+ depends on I2C -+ select FXOS8700 -+ select REGMAP_I2C -+ help -+ Say yes here to build support for the NXP FXOS8700 m+g combo -+ sensor on I2C. -+ -+ This driver can also be built as a module. If so, the module will be -+ called fxos8700_i2c. -+ -+config FXOS8700_SPI -+ tristate "NXP FXOS8700 SPI driver" -+ depends on SPI -+ select FXOS8700 -+ select REGMAP_SPI -+ help -+ Say yes here to build support for the NXP FXOS8700 m+g combo -+ sensor on SPI. -+ -+ This driver can also be built as a module. If so, the module will be -+ called fxos8700_spi. -+ - config KMX61 - tristate "Kionix KMX61 6-axis accelerometer and magnetometer" - depends on I2C ---- a/drivers/iio/imu/Makefile -+++ b/drivers/iio/imu/Makefile -@@ -14,6 +14,11 @@ adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) + - obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o - - obj-y += bmi160/ -+ -+obj-$(CONFIG_FXOS8700) += fxos8700_core.o -+obj-$(CONFIG_FXOS8700_I2C) += fxos8700_i2c.o -+obj-$(CONFIG_FXOS8700_SPI) += fxos8700_spi.o -+ - obj-y += inv_mpu6050/ - - obj-$(CONFIG_KMX61) += kmx61.o ---- /dev/null -+++ b/drivers/iio/imu/fxos8700.h -@@ -0,0 +1,10 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef FXOS8700_H_ -+#define FXOS8700_H_ -+ -+extern const struct regmap_config fxos8700_regmap_config; -+ -+int fxos8700_core_probe(struct device *dev, struct regmap *regmap, -+ const char *name, bool use_spi); -+ -+#endif /* FXOS8700_H_ */ ---- /dev/null -+++ b/drivers/iio/imu/fxos8700_core.c -@@ -0,0 +1,649 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * FXOS8700 - NXP IMU (accelerometer plus magnetometer) -+ * -+ * IIO core driver for FXOS8700, with support for I2C/SPI busses -+ * -+ * TODO: Buffer, trigger, and IRQ support -+ */ -+#include <linux/module.h> -+#include <linux/regmap.h> -+#include <linux/acpi.h> -+#include <linux/bitops.h> -+ -+#include <linux/iio/iio.h> -+#include <linux/iio/sysfs.h> -+ -+#include "fxos8700.h" -+ -+/* Register Definitions */ -+#define FXOS8700_STATUS 0x00 -+#define FXOS8700_OUT_X_MSB 0x01 -+#define FXOS8700_OUT_X_LSB 0x02 -+#define FXOS8700_OUT_Y_MSB 0x03 -+#define FXOS8700_OUT_Y_LSB 0x04 -+#define FXOS8700_OUT_Z_MSB 0x05 -+#define FXOS8700_OUT_Z_LSB 0x06 -+#define FXOS8700_F_SETUP 0x09 -+#define FXOS8700_TRIG_CFG 0x0a -+#define FXOS8700_SYSMOD 0x0b -+#define FXOS8700_INT_SOURCE 0x0c -+#define FXOS8700_WHO_AM_I 0x0d -+#define FXOS8700_XYZ_DATA_CFG 0x0e -+#define FXOS8700_HP_FILTER_CUTOFF 0x0f -+#define FXOS8700_PL_STATUS 0x10 -+#define FXOS8700_PL_CFG 0x11 -+#define FXOS8700_PL_COUNT 0x12 -+#define FXOS8700_PL_BF_ZCOMP 0x13 -+#define FXOS8700_PL_THS_REG 0x14 -+#define FXOS8700_A_FFMT_CFG 0x15 -+#define FXOS8700_A_FFMT_SRC 0x16 -+#define FXOS8700_A_FFMT_THS 0x17 -+#define FXOS8700_A_FFMT_COUNT 0x18 -+#define FXOS8700_TRANSIENT_CFG 0x1d -+#define FXOS8700_TRANSIENT_SRC 0x1e -+#define FXOS8700_TRANSIENT_THS 0x1f -+#define FXOS8700_TRANSIENT_COUNT 0x20 -+#define FXOS8700_PULSE_CFG 0x21 -+#define FXOS8700_PULSE_SRC 0x22 -+#define FXOS8700_PULSE_THSX 0x23 -+#define FXOS8700_PULSE_THSY 0x24 -+#define FXOS8700_PULSE_THSZ 0x25 -+#define FXOS8700_PULSE_TMLT 0x26 -+#define FXOS8700_PULSE_LTCY 0x27 -+#define FXOS8700_PULSE_WIND 0x28 -+#define FXOS8700_ASLP_COUNT 0x29 -+#define FXOS8700_CTRL_REG1 0x2a -+#define FXOS8700_CTRL_REG2 0x2b -+#define FXOS8700_CTRL_REG3 0x2c -+#define FXOS8700_CTRL_REG4 0x2d -+#define FXOS8700_CTRL_REG5 0x2e -+#define FXOS8700_OFF_X 0x2f -+#define FXOS8700_OFF_Y 0x30 -+#define FXOS8700_OFF_Z 0x31 -+#define FXOS8700_M_DR_STATUS 0x32 -+#define FXOS8700_M_OUT_X_MSB 0x33 -+#define FXOS8700_M_OUT_X_LSB 0x34 -+#define FXOS8700_M_OUT_Y_MSB 0x35 -+#define FXOS8700_M_OUT_Y_LSB 0x36 -+#define FXOS8700_M_OUT_Z_MSB 0x37 -+#define FXOS8700_M_OUT_Z_LSB 0x38 -+#define FXOS8700_CMP_X_MSB 0x39 -+#define FXOS8700_CMP_X_LSB 0x3a -+#define FXOS8700_CMP_Y_MSB 0x3b -+#define FXOS8700_CMP_Y_LSB 0x3c -+#define FXOS8700_CMP_Z_MSB 0x3d -+#define FXOS8700_CMP_Z_LSB 0x3e -+#define FXOS8700_M_OFF_X_MSB 0x3f -+#define FXOS8700_M_OFF_X_LSB 0x40 -+#define FXOS8700_M_OFF_Y_MSB 0x41 -+#define FXOS8700_M_OFF_Y_LSB 0x42 -+#define FXOS8700_M_OFF_Z_MSB 0x43 -+#define FXOS8700_M_OFF_Z_LSB 0x44 -+#define FXOS8700_MAX_X_MSB 0x45 -+#define FXOS8700_MAX_X_LSB 0x46 -+#define FXOS8700_MAX_Y_MSB 0x47 -+#define FXOS8700_MAX_Y_LSB 0x48 -+#define FXOS8700_MAX_Z_MSB 0x49 -+#define FXOS8700_MAX_Z_LSB 0x4a -+#define FXOS8700_MIN_X_MSB 0x4b -+#define FXOS8700_MIN_X_LSB 0x4c -+#define FXOS8700_MIN_Y_MSB 0x4d -+#define FXOS8700_MIN_Y_LSB 0x4e -+#define FXOS8700_MIN_Z_MSB 0x4f -+#define FXOS8700_MIN_Z_LSB 0x50 -+#define FXOS8700_TEMP 0x51 -+#define FXOS8700_M_THS_CFG 0x52 -+#define FXOS8700_M_THS_SRC 0x53 -+#define FXOS8700_M_THS_X_MSB 0x54 -+#define FXOS8700_M_THS_X_LSB 0x55 -+#define FXOS8700_M_THS_Y_MSB 0x56 -+#define FXOS8700_M_THS_Y_LSB 0x57 -+#define FXOS8700_M_THS_Z_MSB 0x58 -+#define FXOS8700_M_THS_Z_LSB 0x59 -+#define FXOS8700_M_THS_COUNT 0x5a -+#define FXOS8700_M_CTRL_REG1 0x5b -+#define FXOS8700_M_CTRL_REG2 0x5c -+#define FXOS8700_M_CTRL_REG3 0x5d -+#define FXOS8700_M_INT_SRC 0x5e -+#define FXOS8700_A_VECM_CFG 0x5f -+#define FXOS8700_A_VECM_THS_MSB 0x60 -+#define FXOS8700_A_VECM_THS_LSB 0x61 -+#define FXOS8700_A_VECM_CNT 0x62 -+#define FXOS8700_A_VECM_INITX_MSB 0x63 -+#define FXOS8700_A_VECM_INITX_LSB 0x64 -+#define FXOS8700_A_VECM_INITY_MSB 0x65 -+#define FXOS8700_A_VECM_INITY_LSB 0x66 -+#define FXOS8700_A_VECM_INITZ_MSB 0x67 -+#define FXOS8700_A_VECM_INITZ_LSB 0x68 -+#define FXOS8700_M_VECM_CFG 0x69 -+#define FXOS8700_M_VECM_THS_MSB 0x6a -+#define FXOS8700_M_VECM_THS_LSB 0x6b -+#define FXOS8700_M_VECM_CNT 0x6c -+#define FXOS8700_M_VECM_INITX_MSB 0x6d -+#define FXOS8700_M_VECM_INITX_LSB 0x6e -+#define FXOS8700_M_VECM_INITY_MSB 0x6f -+#define FXOS8700_M_VECM_INITY_LSB 0x70 -+#define FXOS8700_M_VECM_INITZ_MSB 0x71 -+#define FXOS8700_M_VECM_INITZ_LSB 0x72 -+#define FXOS8700_A_FFMT_THS_X_MSB 0x73 -+#define FXOS8700_A_FFMT_THS_X_LSB 0x74 -+#define FXOS8700_A_FFMT_THS_Y_MSB 0x75 -+#define FXOS8700_A_FFMT_THS_Y_LSB 0x76 -+#define FXOS8700_A_FFMT_THS_Z_MSB 0x77 -+#define FXOS8700_A_FFMT_THS_Z_LSB 0x78 -+#define FXOS8700_A_TRAN_INIT_MSB 0x79 -+#define FXOS8700_A_TRAN_INIT_LSB_X 0x7a -+#define FXOS8700_A_TRAN_INIT_LSB_Y 0x7b -+#define FXOS8700_A_TRAN_INIT_LSB_Z 0x7d -+#define FXOS8700_TM_NVM_LOCK 0x7e -+#define FXOS8700_NVM_DATA0_35 0x80 -+#define FXOS8700_NVM_DATA_BNK3 0xa4 -+#define FXOS8700_NVM_DATA_BNK2 0xa5 -+#define FXOS8700_NVM_DATA_BNK1 0xa6 -+#define FXOS8700_NVM_DATA_BNK0 0xa7 -+ -+/* Bit definitions for FXOS8700_CTRL_REG1 */ -+#define FXOS8700_CTRL_ODR_MSK 0x38 -+#define FXOS8700_CTRL_ODR_MAX 0x00 -+#define FXOS8700_CTRL_ODR_MIN GENMASK(4, 3) -+ -+/* Bit definitions for FXOS8700_M_CTRL_REG1 */ -+#define FXOS8700_HMS_MASK GENMASK(1, 0) -+#define FXOS8700_OS_MASK GENMASK(4, 2) -+ -+/* Bit definitions for FXOS8700_M_CTRL_REG2 */ -+#define FXOS8700_MAXMIN_RST BIT(2) -+#define FXOS8700_MAXMIN_DIS_THS BIT(3) -+#define FXOS8700_MAXMIN_DIS BIT(4) -+ -+#define FXOS8700_ACTIVE 0x01 -+#define FXOS8700_ACTIVE_MIN_USLEEP 4000 /* from table 6 in datasheet */ -+ -+#define FXOS8700_DEVICE_ID 0xC7 -+#define FXOS8700_PRE_DEVICE_ID 0xC4 -+#define FXOS8700_DATA_BUF_SIZE 3 -+ -+struct fxos8700_data { -+ struct regmap *regmap; -+ struct iio_trigger *trig; -+ __be16 buf[FXOS8700_DATA_BUF_SIZE] ____cacheline_aligned; -+}; -+ -+/* Regmap info */ -+static const struct regmap_range read_range[] = { -+ { -+ .range_min = FXOS8700_STATUS, -+ .range_max = FXOS8700_A_FFMT_COUNT, -+ }, { -+ .range_min = FXOS8700_TRANSIENT_CFG, -+ .range_max = FXOS8700_A_FFMT_THS_Z_LSB, -+ }, -+}; -+ -+static const struct regmap_range write_range[] = { -+ { -+ .range_min = FXOS8700_F_SETUP, -+ .range_max = FXOS8700_TRIG_CFG, -+ }, { -+ .range_min = FXOS8700_XYZ_DATA_CFG, -+ .range_max = FXOS8700_HP_FILTER_CUTOFF, -+ }, { -+ .range_min = FXOS8700_PL_CFG, -+ .range_max = FXOS8700_A_FFMT_CFG, -+ }, { -+ .range_min = FXOS8700_A_FFMT_THS, -+ .range_max = FXOS8700_TRANSIENT_CFG, -+ }, { -+ .range_min = FXOS8700_TRANSIENT_THS, -+ .range_max = FXOS8700_PULSE_CFG, -+ }, { -+ .range_min = FXOS8700_PULSE_THSX, -+ .range_max = FXOS8700_OFF_Z, -+ }, { -+ .range_min = FXOS8700_M_OFF_X_MSB, -+ .range_max = FXOS8700_M_OFF_Z_LSB, -+ }, { -+ .range_min = FXOS8700_M_THS_CFG, -+ .range_max = FXOS8700_M_THS_CFG, -+ }, { -+ .range_min = FXOS8700_M_THS_X_MSB, -+ .range_max = FXOS8700_M_CTRL_REG3, -+ }, { -+ .range_min = FXOS8700_A_VECM_CFG, -+ .range_max = FXOS8700_A_FFMT_THS_Z_LSB, -+ }, -+}; -+ -+static const struct regmap_access_table driver_read_table = { -+ .yes_ranges = read_range, -+ .n_yes_ranges = ARRAY_SIZE(read_range), -+}; -+ -+static const struct regmap_access_table driver_write_table = { -+ .yes_ranges = write_range, -+ .n_yes_ranges = ARRAY_SIZE(write_range), -+}; -+ -+const struct regmap_config fxos8700_regmap_config = { -+ .reg_bits = 8, -+ .val_bits = 8, -+ .max_register = FXOS8700_NVM_DATA_BNK0, -+ .rd_table = &driver_read_table, -+ .wr_table = &driver_write_table, -+}; -+EXPORT_SYMBOL(fxos8700_regmap_config); -+ -+#define FXOS8700_CHANNEL(_type, _axis) { \ -+ .type = _type, \ -+ .modified = 1, \ -+ .channel2 = IIO_MOD_##_axis, \ -+ .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), \ -+ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ -+ BIT(IIO_CHAN_INFO_SAMP_FREQ), \ -+} -+ -+enum fxos8700_accel_scale_bits { -+ MODE_2G = 0, -+ MODE_4G, -+ MODE_8G, -+}; -+ -+/* scan indexes follow DATA register order */ -+enum fxos8700_scan_axis { -+ FXOS8700_SCAN_ACCEL_X = 0, -+ FXOS8700_SCAN_ACCEL_Y, -+ FXOS8700_SCAN_ACCEL_Z, -+ FXOS8700_SCAN_MAGN_X, -+ FXOS8700_SCAN_MAGN_Y, -+ FXOS8700_SCAN_MAGN_Z, -+ FXOS8700_SCAN_RHALL, -+ FXOS8700_SCAN_TIMESTAMP, -+}; -+ -+enum fxos8700_sensor { -+ FXOS8700_ACCEL = 0, -+ FXOS8700_MAGN, -+ FXOS8700_NUM_SENSORS /* must be last */ -+}; -+ -+enum fxos8700_int_pin { -+ FXOS8700_PIN_INT1, -+ FXOS8700_PIN_INT2 -+}; -+ -+struct fxos8700_scale { -+ u8 bits; -+ int uscale; -+}; -+ -+struct fxos8700_odr { -+ u8 bits; -+ int odr; -+ int uodr; -+}; -+ -+static const struct fxos8700_scale fxos8700_accel_scale[] = { -+ { MODE_2G, 244}, -+ { MODE_4G, 488}, -+ { MODE_8G, 976}, -+}; -+ -+/* -+ * Accellerometer and magnetometer have the same ODR options, set in the -+ * CTRL_REG1 register. ODR is halved when using both sensors at once in -+ * hybrid mode. -+ */ -+static const struct fxos8700_odr fxos8700_odr[] = { -+ {0x00, 800, 0}, -+ {0x01, 400, 0}, -+ {0x02, 200, 0}, -+ {0x03, 100, 0}, -+ {0x04, 50, 0}, -+ {0x05, 12, 500000}, -+ {0x06, 6, 250000}, -+ {0x07, 1, 562500}, -+}; -+ -+static const struct iio_chan_spec fxos8700_channels[] = { -+ FXOS8700_CHANNEL(IIO_ACCEL, X), -+ FXOS8700_CHANNEL(IIO_ACCEL, Y), -+ FXOS8700_CHANNEL(IIO_ACCEL, Z), -+ FXOS8700_CHANNEL(IIO_MAGN, X), -+ FXOS8700_CHANNEL(IIO_MAGN, Y), -+ FXOS8700_CHANNEL(IIO_MAGN, Z), -+ IIO_CHAN_SOFT_TIMESTAMP(FXOS8700_SCAN_TIMESTAMP), -+}; -+ -+static enum fxos8700_sensor fxos8700_to_sensor(enum iio_chan_type iio_type) -+{ -+ switch (iio_type) { -+ case IIO_ACCEL: -+ return FXOS8700_ACCEL; -+ case IIO_ANGL_VEL: -+ return FXOS8700_MAGN; -+ default: -+ return -EINVAL; -+ } -+} -+ -+static int fxos8700_set_active_mode(struct fxos8700_data *data, -+ enum fxos8700_sensor t, bool mode) -+{ -+ int ret; -+ -+ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, mode); -+ if (ret) -+ return ret; -+ -+ usleep_range(FXOS8700_ACTIVE_MIN_USLEEP, -+ FXOS8700_ACTIVE_MIN_USLEEP + 1000); -+ -+ return 0; -+} -+ -+static int fxos8700_set_scale(struct fxos8700_data *data, -+ enum fxos8700_sensor t, int uscale) -+{ -+ int i; -+ static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale); -+ struct device *dev = regmap_get_device(data->regmap); -+ -+ if (t == FXOS8700_MAGN) { -+ dev_err(dev, "Magnetometer scale is locked at 1200uT\n"); -+ return -EINVAL; -+ } -+ -+ for (i = 0; i < scale_num; i++) -+ if (fxos8700_accel_scale[i].uscale == uscale) -+ break; -+ -+ if (i == scale_num) -+ return -EINVAL; -+ -+ return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, -+ fxos8700_accel_scale[i].bits); -+} -+ -+static int fxos8700_get_scale(struct fxos8700_data *data, -+ enum fxos8700_sensor t, int *uscale) -+{ -+ int i, ret, val; -+ static const int scale_num = ARRAY_SIZE(fxos8700_accel_scale); -+ -+ if (t == FXOS8700_MAGN) { -+ *uscale = 1200; /* Magnetometer is locked at 1200uT */ -+ return 0; -+ } -+ -+ ret = regmap_read(data->regmap, FXOS8700_XYZ_DATA_CFG, &val); -+ if (ret) -+ return ret; -+ -+ for (i = 0; i < scale_num; i++) { -+ if (fxos8700_accel_scale[i].bits == (val & 0x3)) { -+ *uscale = fxos8700_accel_scale[i].uscale; -+ return 0; -+ } -+ } -+ -+ return -EINVAL; -+} -+ -+static int fxos8700_get_data(struct fxos8700_data *data, int chan_type, -+ int axis, int *val) -+{ -+ u8 base, reg; -+ int ret; -+ enum fxos8700_sensor type = fxos8700_to_sensor(chan_type); -+ -+ base = type ? FXOS8700_OUT_X_MSB : FXOS8700_M_OUT_X_MSB; -+ -+ /* Block read 6 bytes of device output registers to avoid data loss */ -+ ret = regmap_bulk_read(data->regmap, base, data->buf, -+ FXOS8700_DATA_BUF_SIZE); -+ if (ret) -+ return ret; -+ -+ /* Convert axis to buffer index */ -+ reg = axis - IIO_MOD_X; -+ -+ /* Convert to native endianness */ -+ *val = sign_extend32(be16_to_cpu(data->buf[reg]), 15); -+ -+ return 0; -+} -+ -+static int fxos8700_set_odr(struct fxos8700_data *data, enum fxos8700_sensor t, -+ int odr, int uodr) -+{ -+ int i, ret, val; -+ bool active_mode; -+ static const int odr_num = ARRAY_SIZE(fxos8700_odr); -+ -+ ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val); -+ if (ret) -+ return ret; -+ -+ active_mode = val & FXOS8700_ACTIVE; -+ -+ if (active_mode) { -+ /* -+ * The device must be in standby mode to change any of the -+ * other fields within CTRL_REG1 -+ */ -+ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, -+ val & ~FXOS8700_ACTIVE); -+ if (ret) -+ return ret; -+ } -+ -+ for (i = 0; i < odr_num; i++) -+ if (fxos8700_odr[i].odr == odr && fxos8700_odr[i].uodr == uodr) -+ break; -+ -+ if (i >= odr_num) -+ return -EINVAL; -+ -+ return regmap_update_bits(data->regmap, -+ FXOS8700_CTRL_REG1, -+ FXOS8700_CTRL_ODR_MSK + FXOS8700_ACTIVE, -+ fxos8700_odr[i].bits << 3 | active_mode); -+} -+ -+static int fxos8700_get_odr(struct fxos8700_data *data, enum fxos8700_sensor t, -+ int *odr, int *uodr) -+{ -+ int i, val, ret; -+ static const int odr_num = ARRAY_SIZE(fxos8700_odr); -+ -+ ret = regmap_read(data->regmap, FXOS8700_CTRL_REG1, &val); -+ if (ret) -+ return ret; -+ -+ val &= FXOS8700_CTRL_ODR_MSK; -+ -+ for (i = 0; i < odr_num; i++) -+ if (val == fxos8700_odr[i].bits) -+ break; -+ -+ if (i >= odr_num) -+ return -EINVAL; -+ -+ *odr = fxos8700_odr[i].odr; -+ *uodr = fxos8700_odr[i].uodr; -+ -+ return 0; -+} -+ -+static int fxos8700_read_raw(struct iio_dev *indio_dev, -+ struct iio_chan_spec const *chan, -+ int *val, int *val2, long mask) -+{ -+ int ret; -+ struct fxos8700_data *data = iio_priv(indio_dev); -+ -+ switch (mask) { -+ case IIO_CHAN_INFO_RAW: -+ ret = fxos8700_get_data(data, chan->type, chan->channel2, val); -+ if (ret) -+ return ret; -+ return IIO_VAL_INT; -+ case IIO_CHAN_INFO_SCALE: -+ *val = 0; -+ ret = fxos8700_get_scale(data, fxos8700_to_sensor(chan->type), -+ val2); -+ return ret ? ret : IIO_VAL_INT_PLUS_MICRO; -+ case IIO_CHAN_INFO_SAMP_FREQ: -+ ret = fxos8700_get_odr(data, fxos8700_to_sensor(chan->type), -+ val, val2); -+ return ret ? ret : IIO_VAL_INT_PLUS_MICRO; -+ default: -+ return -EINVAL; -+ } -+} -+ -+static int fxos8700_write_raw(struct iio_dev *indio_dev, -+ struct iio_chan_spec const *chan, -+ int val, int val2, long mask) -+{ -+ struct fxos8700_data *data = iio_priv(indio_dev); -+ -+ switch (mask) { -+ case IIO_CHAN_INFO_SCALE: -+ return fxos8700_set_scale(data, fxos8700_to_sensor(chan->type), -+ val2); -+ case IIO_CHAN_INFO_SAMP_FREQ: -+ return fxos8700_set_odr(data, fxos8700_to_sensor(chan->type), -+ val, val2); -+ default: -+ return -EINVAL; -+ } -+} -+ -+static IIO_CONST_ATTR(in_accel_sampling_frequency_available, -+ "1.5625 6.25 12.5 50 100 200 400 800"); -+static IIO_CONST_ATTR(in_magn_sampling_frequency_available, -+ "1.5625 6.25 12.5 50 100 200 400 800"); -+static IIO_CONST_ATTR(in_accel_scale_available, "0.000244 0.000488 0.000976"); -+static IIO_CONST_ATTR(in_magn_scale_available, "0.000001200"); -+ -+static struct attribute *fxos8700_attrs[] = { -+ &iio_const_attr_in_accel_sampling_frequency_available.dev_attr.attr, -+ &iio_const_attr_in_magn_sampling_frequency_available.dev_attr.attr, -+ &iio_const_attr_in_accel_scale_available.dev_attr.attr, -+ &iio_const_attr_in_magn_scale_available.dev_attr.attr, -+ NULL, -+}; -+ -+static const struct attribute_group fxos8700_attrs_group = { -+ .attrs = fxos8700_attrs, -+}; -+ -+static const struct iio_info fxos8700_info = { -+ .read_raw = fxos8700_read_raw, -+ .write_raw = fxos8700_write_raw, -+ .attrs = &fxos8700_attrs_group, -+}; -+ -+static int fxos8700_chip_init(struct fxos8700_data *data, bool use_spi) -+{ -+ int ret; -+ unsigned int val; -+ struct device *dev = regmap_get_device(data->regmap); -+ -+ ret = regmap_read(data->regmap, FXOS8700_WHO_AM_I, &val); -+ if (ret) { -+ dev_err(dev, "Error reading chip id\n"); -+ return ret; -+ } -+ if (val != FXOS8700_DEVICE_ID && val != FXOS8700_PRE_DEVICE_ID) { -+ dev_err(dev, "Wrong chip id, got %x expected %x or %x\n", -+ val, FXOS8700_DEVICE_ID, FXOS8700_PRE_DEVICE_ID); -+ return -ENODEV; -+ } -+ -+ ret = fxos8700_set_active_mode(data, FXOS8700_ACCEL, true); -+ if (ret) -+ return ret; -+ -+ ret = fxos8700_set_active_mode(data, FXOS8700_MAGN, true); -+ if (ret) -+ return ret; -+ -+ /* -+ * The device must be in standby mode to change any of the other fields -+ * within CTRL_REG1 -+ */ -+ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, 0x00); -+ if (ret) -+ return ret; -+ -+ /* Set max oversample ratio (OSR) and both devices active */ -+ ret = regmap_write(data->regmap, FXOS8700_M_CTRL_REG1, -+ FXOS8700_HMS_MASK | FXOS8700_OS_MASK); -+ if (ret) -+ return ret; -+ -+ /* Disable and rst min/max measurements & threshold */ -+ ret = regmap_write(data->regmap, FXOS8700_M_CTRL_REG2, -+ FXOS8700_MAXMIN_RST | FXOS8700_MAXMIN_DIS_THS | -+ FXOS8700_MAXMIN_DIS); -+ if (ret) -+ return ret; -+ -+ /* Max ODR (800Hz individual or 400Hz hybrid), active mode */ -+ ret = regmap_write(data->regmap, FXOS8700_CTRL_REG1, -+ FXOS8700_CTRL_ODR_MAX | FXOS8700_ACTIVE); -+ if (ret) -+ return ret; -+ -+ /* Set for max full-scale range (+/-8G) */ -+ return regmap_write(data->regmap, FXOS8700_XYZ_DATA_CFG, MODE_8G); -+} -+ -+static void fxos8700_chip_uninit(void *data) -+{ -+ struct fxos8700_data *fxos8700_data = data; -+ -+ fxos8700_set_active_mode(fxos8700_data, FXOS8700_ACCEL, false); -+ fxos8700_set_active_mode(fxos8700_data, FXOS8700_MAGN, false); -+} -+ -+int fxos8700_core_probe(struct device *dev, struct regmap *regmap, -+ const char *name, bool use_spi) -+{ -+ struct iio_dev *indio_dev; -+ struct fxos8700_data *data; -+ int ret; -+ -+ indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); -+ if (!indio_dev) -+ return -ENOMEM; -+ -+ data = iio_priv(indio_dev); -+ dev_set_drvdata(dev, indio_dev); -+ data->regmap = regmap; -+ -+ ret = fxos8700_chip_init(data, use_spi); -+ if (ret) -+ return ret; -+ -+ ret = devm_add_action_or_reset(dev, fxos8700_chip_uninit, data); -+ if (ret) -+ return ret; -+ -+ indio_dev->dev.parent = dev; -+ indio_dev->channels = fxos8700_channels; -+ indio_dev->num_channels = ARRAY_SIZE(fxos8700_channels); -+ indio_dev->name = name ? name : "fxos8700"; -+ indio_dev->modes = INDIO_DIRECT_MODE; -+ indio_dev->info = &fxos8700_info; -+ -+ return devm_iio_device_register(dev, indio_dev); -+} -+EXPORT_SYMBOL_GPL(fxos8700_core_probe); -+ -+MODULE_AUTHOR("Robert Jones <rjones@gateworks.com>"); -+MODULE_DESCRIPTION("FXOS8700 6-Axis Acc and Mag Combo Sensor driver"); -+MODULE_LICENSE("GPL v2"); ---- /dev/null -+++ b/drivers/iio/imu/fxos8700_i2c.c -@@ -0,0 +1,71 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * FXOS8700 - NXP IMU, I2C bits -+ * -+ * 7-bit I2C slave address determined by SA1 and SA0 logic level -+ * inputs represented in the following table: -+ * SA1 | SA0 | Slave Address -+ * 0 | 0 | 0x1E -+ * 0 | 1 | 0x1D -+ * 1 | 0 | 0x1C -+ * 1 | 1 | 0x1F -+ */ -+#include <linux/acpi.h> -+#include <linux/i2c.h> -+#include <linux/module.h> -+#include <linux/mod_devicetable.h> -+#include <linux/regmap.h> -+ -+#include "fxos8700.h" -+ -+static int fxos8700_i2c_probe(struct i2c_client *client, -+ const struct i2c_device_id *id) -+{ -+ struct regmap *regmap; -+ const char *name = NULL; -+ -+ regmap = devm_regmap_init_i2c(client, &fxos8700_regmap_config); -+ if (IS_ERR(regmap)) { -+ dev_err(&client->dev, "Failed to register i2c regmap %d\n", -+ (int)PTR_ERR(regmap)); -+ return PTR_ERR(regmap); -+ } -+ -+ if (id) -+ name = id->name; -+ -+ return fxos8700_core_probe(&client->dev, regmap, name, false); -+} -+ -+static const struct i2c_device_id fxos8700_i2c_id[] = { -+ {"fxos8700", 0}, -+ { } -+}; -+MODULE_DEVICE_TABLE(i2c, fxos8700_i2c_id); -+ -+static const struct acpi_device_id fxos8700_acpi_match[] = { -+ {"FXOS8700", 0}, -+ { } -+}; -+MODULE_DEVICE_TABLE(acpi, fxos8700_acpi_match); -+ -+static const struct of_device_id fxos8700_of_match[] = { -+ { .compatible = "nxp,fxos8700" }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, fxos8700_of_match); -+ -+static struct i2c_driver fxos8700_i2c_driver = { -+ .driver = { -+ .name = "fxos8700_i2c", -+ .acpi_match_table = ACPI_PTR(fxos8700_acpi_match), -+ .of_match_table = fxos8700_of_match, -+ }, -+ .probe = fxos8700_i2c_probe, -+ .id_table = fxos8700_i2c_id, -+}; -+module_i2c_driver(fxos8700_i2c_driver); -+ -+MODULE_AUTHOR("Robert Jones <rjones@gateworks.com>"); -+MODULE_DESCRIPTION("FXOS8700 I2C driver"); -+MODULE_LICENSE("GPL v2"); ---- /dev/null -+++ b/drivers/iio/imu/fxos8700_spi.c -@@ -0,0 +1,59 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * FXOS8700 - NXP IMU, SPI bits -+ */ -+#include <linux/acpi.h> -+#include <linux/module.h> -+#include <linux/mod_devicetable.h> -+#include <linux/regmap.h> -+#include <linux/spi/spi.h> -+ -+#include "fxos8700.h" -+ -+static int fxos8700_spi_probe(struct spi_device *spi) -+{ -+ struct regmap *regmap; -+ const struct spi_device_id *id = spi_get_device_id(spi); -+ -+ regmap = devm_regmap_init_spi(spi, &fxos8700_regmap_config); -+ if (IS_ERR(regmap)) { -+ dev_err(&spi->dev, "Failed to register spi regmap %d\n", -+ (int)PTR_ERR(regmap)); -+ return PTR_ERR(regmap); -+ } -+ -+ return fxos8700_core_probe(&spi->dev, regmap, id->name, true); -+} -+ -+static const struct spi_device_id fxos8700_spi_id[] = { -+ {"fxos8700", 0}, -+ { } -+}; -+MODULE_DEVICE_TABLE(spi, fxos8700_spi_id); -+ -+static const struct acpi_device_id fxos8700_acpi_match[] = { -+ {"FXOS8700", 0}, -+ { } -+}; -+MODULE_DEVICE_TABLE(acpi, fxos8700_acpi_match); -+ -+static const struct of_device_id fxos8700_of_match[] = { -+ { .compatible = "nxp,fxos8700" }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, fxos8700_of_match); -+ -+static struct spi_driver fxos8700_spi_driver = { -+ .probe = fxos8700_spi_probe, -+ .id_table = fxos8700_spi_id, -+ .driver = { -+ .acpi_match_table = ACPI_PTR(fxos8700_acpi_match), -+ .of_match_table = fxos8700_of_match, -+ .name = "fxos8700_spi", -+ }, -+}; -+module_spi_driver(fxos8700_spi_driver); -+ -+MODULE_AUTHOR("Robert Jones <rjones@gateworks.com>"); -+MODULE_DESCRIPTION("FXOS8700 SPI driver"); -+MODULE_LICENSE("GPL v2"); diff --git a/target/linux/generic/backport-5.4/800-v5.5-scsi-core-Add-sysfs-attributes-for-VPD-pages-0h-and-.patch b/target/linux/generic/backport-5.4/800-v5.5-scsi-core-Add-sysfs-attributes-for-VPD-pages-0h-and-.patch deleted file mode 100644 index 2133280e88..0000000000 --- a/target/linux/generic/backport-5.4/800-v5.5-scsi-core-Add-sysfs-attributes-for-VPD-pages-0h-and-.patch +++ /dev/null @@ -1,122 +0,0 @@ -From d188b0675b21d5a6ca27b3e741381813983f4719 Mon Sep 17 00:00:00 2001 -From: Ryan Attard <ryanattard@ryanattard.info> -Date: Thu, 26 Sep 2019 11:22:17 -0500 -Subject: [PATCH] scsi: core: Add sysfs attributes for VPD pages 0h and 89h - -Add sysfs attributes for the ATA information page and Supported VPD Pages -page. - -Link: https://lore.kernel.org/r/20190926162216.56591-1-ryanattard@ryanattard.info -Signed-off-by: Ryan Attard <ryanattard@ryanattard.info> -Reviewed-by: Bart Van Assche <bvanassche@acm.org> -Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> ---- - drivers/scsi/scsi.c | 4 ++++ - drivers/scsi/scsi_sysfs.c | 19 +++++++++++++++++++ - include/scsi/scsi_device.h | 2 ++ - 3 files changed, 25 insertions(+) - ---- a/drivers/scsi/scsi.c -+++ b/drivers/scsi/scsi.c -@@ -465,10 +465,14 @@ void scsi_attach_vpd(struct scsi_device - return; - - for (i = 4; i < vpd_buf->len; i++) { -+ if (vpd_buf->data[i] == 0x0) -+ scsi_update_vpd_page(sdev, 0x0, &sdev->vpd_pg0); - if (vpd_buf->data[i] == 0x80) - scsi_update_vpd_page(sdev, 0x80, &sdev->vpd_pg80); - if (vpd_buf->data[i] == 0x83) - scsi_update_vpd_page(sdev, 0x83, &sdev->vpd_pg83); -+ if (vpd_buf->data[i] == 0x89) -+ scsi_update_vpd_page(sdev, 0x89, &sdev->vpd_pg89); - } - kfree(vpd_buf); - } ---- a/drivers/scsi/scsi_sysfs.c -+++ b/drivers/scsi/scsi_sysfs.c -@@ -437,6 +437,7 @@ static void scsi_device_dev_release_user - struct device *parent; - struct list_head *this, *tmp; - struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; -+ struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; - unsigned long flags; - struct module *mod; - -@@ -469,16 +470,24 @@ static void scsi_device_dev_release_user - sdev->request_queue = NULL; - - mutex_lock(&sdev->inquiry_mutex); -+ rcu_swap_protected(sdev->vpd_pg0, vpd_pg0, -+ lockdep_is_held(&sdev->inquiry_mutex)); - rcu_swap_protected(sdev->vpd_pg80, vpd_pg80, - lockdep_is_held(&sdev->inquiry_mutex)); - rcu_swap_protected(sdev->vpd_pg83, vpd_pg83, - lockdep_is_held(&sdev->inquiry_mutex)); -+ rcu_swap_protected(sdev->vpd_pg89, vpd_pg89, -+ lockdep_is_held(&sdev->inquiry_mutex)); - mutex_unlock(&sdev->inquiry_mutex); - -+ if (vpd_pg0) -+ kfree_rcu(vpd_pg0, rcu); - if (vpd_pg83) - kfree_rcu(vpd_pg83, rcu); - if (vpd_pg80) - kfree_rcu(vpd_pg80, rcu); -+ if (vpd_pg89) -+ kfree_rcu(vpd_pg89, rcu); - kfree(sdev->inquiry); - kfree(sdev); - -@@ -891,6 +900,8 @@ static struct bin_attribute dev_attr_vpd - - sdev_vpd_pg_attr(pg83); - sdev_vpd_pg_attr(pg80); -+sdev_vpd_pg_attr(pg89); -+sdev_vpd_pg_attr(pg0); - - static ssize_t show_inquiry(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, -@@ -1223,12 +1234,18 @@ static umode_t scsi_sdev_bin_attr_is_vis - struct scsi_device *sdev = to_scsi_device(dev); - - -+ if (attr == &dev_attr_vpd_pg0 && !sdev->vpd_pg0) -+ return 0; -+ - if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80) - return 0; - - if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83) - return 0; - -+ if (attr == &dev_attr_vpd_pg89 && !sdev->vpd_pg89) -+ return 0; -+ - return S_IRUGO; - } - -@@ -1271,8 +1288,10 @@ static struct attribute *scsi_sdev_attrs - }; - - static struct bin_attribute *scsi_sdev_bin_attrs[] = { -+ &dev_attr_vpd_pg0, - &dev_attr_vpd_pg83, - &dev_attr_vpd_pg80, -+ &dev_attr_vpd_pg89, - &dev_attr_inquiry, - NULL - }; ---- a/include/scsi/scsi_device.h -+++ b/include/scsi/scsi_device.h -@@ -140,8 +140,10 @@ struct scsi_device { - const char * rev; /* ... "nullnullnullnull" before scan */ - - #define SCSI_VPD_PG_LEN 255 -+ struct scsi_vpd __rcu *vpd_pg0; - struct scsi_vpd __rcu *vpd_pg83; - struct scsi_vpd __rcu *vpd_pg80; -+ struct scsi_vpd __rcu *vpd_pg89; - unsigned char current_tag; /* current tag */ - struct scsi_target *sdev_target; /* used only for single_lun */ - diff --git a/target/linux/generic/backport-5.4/801-v5.5-hwmon-Driver-for-disk-and-solid-state-drives-with-te.patch b/target/linux/generic/backport-5.4/801-v5.5-hwmon-Driver-for-disk-and-solid-state-drives-with-te.patch deleted file mode 100644 index 32a629772f..0000000000 --- a/target/linux/generic/backport-5.4/801-v5.5-hwmon-Driver-for-disk-and-solid-state-drives-with-te.patch +++ /dev/null @@ -1,737 +0,0 @@ -From 5b46903d8bf372e563bf2150d46b87fff197a109 Mon Sep 17 00:00:00 2001 -From: Guenter Roeck <linux@roeck-us.net> -Date: Thu, 28 Nov 2019 21:34:40 -0800 -Subject: [PATCH] hwmon: Driver for disk and solid state drives with - temperature sensors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Reading the temperature of ATA drives has been supported for years -by userspace tools such as smarttools or hddtemp. The downside of -such tools is that they need to run with super-user privilege, that -the temperatures are not reported by standard tools such as 'sensors' -or 'libsensors', and that drive temperatures are not available for use -in the kernel's thermal subsystem. - -This driver solves this problem by adding support for reading the -temperature of ATA drives from the kernel using the hwmon API and -by adding a temperature zone for each drive. - -With this driver, the hard disk temperature can be read using the -unprivileged 'sensors' application: - -$ sensors drivetemp-scsi-1-0 -drivetemp-scsi-1-0 -Adapter: SCSI adapter -temp1: +23.0°C - -or directly from sysfs: - -$ grep . /sys/class/hwmon/hwmon9/{name,temp1_input} -/sys/class/hwmon/hwmon9/name:drivetemp -/sys/class/hwmon/hwmon9/temp1_input:23000 - -If the drive supports SCT transport and reports temperature limits, -those are reported as well. - -drivetemp-scsi-0-0 -Adapter: SCSI adapter -temp1: +27.0°C (low = +0.0°C, high = +60.0°C) - (crit low = -41.0°C, crit = +85.0°C) - (lowest = +23.0°C, highest = +34.0°C) - -The driver attempts to use SCT Command Transport to read the drive -temperature. If the SCT Command Transport feature set is not available, -or if it does not report the drive temperature, drive temperatures may -be readable through SMART attributes. Since SMART attributes are not well -defined, this method is only used as fallback mechanism. - -Cc: Chris Healy <cphealy@gmail.com> -Cc: Linus Walleij <linus.walleij@linaro.org> -Cc: Martin K. Petersen <martin.petersen@oracle.com> -Cc: Bart Van Assche <bvanassche@acm.org> -Reviewed-by: Linus Walleij <linus.walleij@linaro.org> -Tested-by: Linus Walleij <linus.walleij@linaro.org> -Signed-off-by: Guenter Roeck <linux@roeck-us.net> ---- - Documentation/hwmon/drivetemp.rst | 52 +++ - Documentation/hwmon/index.rst | 1 + - drivers/hwmon/Kconfig | 10 + - drivers/hwmon/Makefile | 1 + - drivers/hwmon/drivetemp.c | 574 ++++++++++++++++++++++++++++++ - 5 files changed, 638 insertions(+) - create mode 100644 Documentation/hwmon/drivetemp.rst - create mode 100644 drivers/hwmon/drivetemp.c - ---- /dev/null -+++ b/Documentation/hwmon/drivetemp.rst -@@ -0,0 +1,52 @@ -+.. SPDX-License-Identifier: GPL-2.0 -+ -+Kernel driver drivetemp -+======================= -+ -+ -+References -+---------- -+ -+ANS T13/1699-D -+Information technology - AT Attachment 8 - ATA/ATAPI Command Set (ATA8-ACS) -+ -+ANS Project T10/BSR INCITS 513 -+Information technology - SCSI Primary Commands - 4 (SPC-4) -+ -+ANS Project INCITS 557 -+Information technology - SCSI / ATA Translation - 5 (SAT-5) -+ -+ -+Description -+----------- -+ -+This driver supports reporting the temperature of disk and solid state -+drives with temperature sensors. -+ -+If supported, it uses the ATA SCT Command Transport feature to read -+the current drive temperature and, if available, temperature limits -+as well as historic minimum and maximum temperatures. If SCT Command -+Transport is not supported, the driver uses SMART attributes to read -+the drive temperature. -+ -+ -+Sysfs entries -+------------- -+ -+Only the temp1_input attribute is always available. Other attributes are -+available only if reported by the drive. All temperatures are reported in -+milli-degrees Celsius. -+ -+======================= ===================================================== -+temp1_input Current drive temperature -+temp1_lcrit Minimum temperature limit. Operating the device below -+ this temperature may cause physical damage to the -+ device. -+temp1_min Minimum recommended continuous operating limit -+temp1_max Maximum recommended continuous operating temperature -+temp1_crit Maximum temperature limit. Operating the device above -+ this temperature may cause physical damage to the -+ device. -+temp1_lowest Minimum temperature seen this power cycle -+temp1_highest Maximum temperature seen this power cycle -+======================= ===================================================== ---- a/Documentation/hwmon/index.rst -+++ b/Documentation/hwmon/index.rst -@@ -45,6 +45,7 @@ Hardware Monitoring Kernel Drivers - da9052 - da9055 - dme1737 -+ drivetemp - ds1621 - ds620 - emc1403 ---- a/drivers/hwmon/Kconfig -+++ b/drivers/hwmon/Kconfig -@@ -385,6 +385,16 @@ config SENSORS_ATXP1 - This driver can also be built as a module. If so, the module - will be called atxp1. - -+config SENSORS_DRIVETEMP -+ tristate "Hard disk drives with temperature sensors" -+ depends on SCSI && ATA -+ help -+ If you say yes you get support for the temperature sensor on -+ hard disk drives. -+ -+ This driver can also be built as a module. If so, the module -+ will be called satatemp. -+ - config SENSORS_DS620 - tristate "Dallas Semiconductor DS620" - depends on I2C ---- a/drivers/hwmon/Makefile -+++ b/drivers/hwmon/Makefile -@@ -56,6 +56,7 @@ obj-$(CONFIG_SENSORS_DA9052_ADC)+= da905 - obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o - obj-$(CONFIG_SENSORS_DELL_SMM) += dell-smm-hwmon.o - obj-$(CONFIG_SENSORS_DME1737) += dme1737.o -+obj-$(CONFIG_SENSORS_DRIVETEMP) += drivetemp.o - obj-$(CONFIG_SENSORS_DS620) += ds620.o - obj-$(CONFIG_SENSORS_DS1621) += ds1621.o - obj-$(CONFIG_SENSORS_EMC1403) += emc1403.o ---- /dev/null -+++ b/drivers/hwmon/drivetemp.c -@@ -0,0 +1,574 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Hwmon client for disk and solid state drives with temperature sensors -+ * Copyright (C) 2019 Zodiac Inflight Innovations -+ * -+ * With input from: -+ * Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors. -+ * (C) 2018 Linus Walleij -+ * -+ * hwmon: Driver for SCSI/ATA temperature sensors -+ * by Constantin Baranov <const@mimas.ru>, submitted September 2009 -+ * -+ * This drive supports reporting the temperatire of SATA drives. It can be -+ * easily extended to report the temperature of SCSI drives. -+ * -+ * The primary means to read drive temperatures and temperature limits -+ * for ATA drives is the SCT Command Transport feature set as specified in -+ * ATA8-ACS. -+ * It can be used to read the current drive temperature, temperature limits, -+ * and historic minimum and maximum temperatures. The SCT Command Transport -+ * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set -+ * (ATA8-ACS)". -+ * -+ * If the SCT Command Transport feature set is not available, drive temperatures -+ * may be readable through SMART attributes. Since SMART attributes are not well -+ * defined, this method is only used as fallback mechanism. -+ * -+ * There are three SMART attributes which may report drive temperatures. -+ * Those are defined as follows (from -+ * http://www.cropel.com/library/smart-attribute-list.aspx). -+ * -+ * 190 Temperature Temperature, monitored by a sensor somewhere inside -+ * the drive. Raw value typicaly holds the actual -+ * temperature (hexadecimal) in its rightmost two digits. -+ * -+ * 194 Temperature Temperature, monitored by a sensor somewhere inside -+ * the drive. Raw value typicaly holds the actual -+ * temperature (hexadecimal) in its rightmost two digits. -+ * -+ * 231 Temperature Temperature, monitored by a sensor somewhere inside -+ * the drive. Raw value typicaly holds the actual -+ * temperature (hexadecimal) in its rightmost two digits. -+ * -+ * Wikipedia defines attributes a bit differently. -+ * -+ * 190 Temperature Value is equal to (100-temp. °C), allowing manufacturer -+ * Difference or to set a minimum threshold which corresponds to a -+ * Airflow maximum temperature. This also follows the convention of -+ * Temperature 100 being a best-case value and lower values being -+ * undesirable. However, some older drives may instead -+ * report raw Temperature (identical to 0xC2) or -+ * Temperature minus 50 here. -+ * 194 Temperature or Indicates the device temperature, if the appropriate -+ * Temperature sensor is fitted. Lowest byte of the raw value contains -+ * Celsius the exact temperature value (Celsius degrees). -+ * 231 Life Left Indicates the approximate SSD life left, in terms of -+ * (SSDs) or program/erase cycles or available reserved blocks. -+ * Temperature A normalized value of 100 represents a new drive, with -+ * a threshold value at 10 indicating a need for -+ * replacement. A value of 0 may mean that the drive is -+ * operating in read-only mode to allow data recovery. -+ * Previously (pre-2010) occasionally used for Drive -+ * Temperature (more typically reported at 0xC2). -+ * -+ * Common denominator is that the first raw byte reports the temperature -+ * in degrees C on almost all drives. Some drives may report a fractional -+ * temperature in the second raw byte. -+ * -+ * Known exceptions (from libatasmart): -+ * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th -+ * degrees C in the first two raw bytes. -+ * - A few Maxtor drives report an unknown or bad value in attribute 194. -+ * - Certain Apple SSD drives report an unknown value in attribute 190. -+ * Only certain firmware versions are affected. -+ * -+ * Those exceptions affect older ATA drives and are currently ignored. -+ * Also, the second raw byte (possibly reporting the fractional temperature) -+ * is currently ignored. -+ * -+ * Many drives also report temperature limits in additional SMART data raw -+ * bytes. The format of those is not well defined and varies widely. -+ * The driver does not currently attempt to report those limits. -+ * -+ * According to data in smartmontools, attribute 231 is rarely used to report -+ * drive temperatures. At the same time, several drives report SSD life left -+ * in attribute 231, but do not support temperature sensors. For this reason, -+ * attribute 231 is currently ignored. -+ * -+ * Following above definitions, temperatures are reported as follows. -+ * If SCT Command Transport is supported, it is used to read the -+ * temperature and, if available, temperature limits. -+ * - Otherwise, if SMART attribute 194 is supported, it is used to read -+ * the temperature. -+ * - Otherwise, if SMART attribute 190 is supported, it is used to read -+ * the temperature. -+ */ -+ -+#include <linux/ata.h> -+#include <linux/bits.h> -+#include <linux/device.h> -+#include <linux/hwmon.h> -+#include <linux/kernel.h> -+#include <linux/list.h> -+#include <linux/module.h> -+#include <linux/mutex.h> -+#include <scsi/scsi_cmnd.h> -+#include <scsi/scsi_device.h> -+#include <scsi/scsi_driver.h> -+#include <scsi/scsi_proto.h> -+ -+struct drivetemp_data { -+ struct list_head list; /* list of instantiated devices */ -+ struct mutex lock; /* protect data buffer accesses */ -+ struct scsi_device *sdev; /* SCSI device */ -+ struct device *dev; /* instantiating device */ -+ struct device *hwdev; /* hardware monitoring device */ -+ u8 smartdata[ATA_SECT_SIZE]; /* local buffer */ -+ int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val); -+ bool have_temp_lowest; /* lowest temp in SCT status */ -+ bool have_temp_highest; /* highest temp in SCT status */ -+ bool have_temp_min; /* have min temp */ -+ bool have_temp_max; /* have max temp */ -+ bool have_temp_lcrit; /* have lower critical limit */ -+ bool have_temp_crit; /* have critical limit */ -+ int temp_min; /* min temp */ -+ int temp_max; /* max temp */ -+ int temp_lcrit; /* lower critical limit */ -+ int temp_crit; /* critical limit */ -+}; -+ -+static LIST_HEAD(drivetemp_devlist); -+ -+#define ATA_MAX_SMART_ATTRS 30 -+#define SMART_TEMP_PROP_190 190 -+#define SMART_TEMP_PROP_194 194 -+ -+#define SCT_STATUS_REQ_ADDR 0xe0 -+#define SCT_STATUS_VERSION_LOW 0 /* log byte offsets */ -+#define SCT_STATUS_VERSION_HIGH 1 -+#define SCT_STATUS_TEMP 200 -+#define SCT_STATUS_TEMP_LOWEST 201 -+#define SCT_STATUS_TEMP_HIGHEST 202 -+#define SCT_READ_LOG_ADDR 0xe1 -+#define SMART_READ_LOG 0xd5 -+#define SMART_WRITE_LOG 0xd6 -+ -+#define INVALID_TEMP 0x80 -+ -+#define temp_is_valid(temp) ((temp) != INVALID_TEMP) -+#define temp_from_sct(temp) (((s8)(temp)) * 1000) -+ -+static inline bool ata_id_smart_supported(u16 *id) -+{ -+ return id[ATA_ID_COMMAND_SET_1] & BIT(0); -+} -+ -+static inline bool ata_id_smart_enabled(u16 *id) -+{ -+ return id[ATA_ID_CFS_ENABLE_1] & BIT(0); -+} -+ -+static int drivetemp_scsi_command(struct drivetemp_data *st, -+ u8 ata_command, u8 feature, -+ u8 lba_low, u8 lba_mid, u8 lba_high) -+{ -+ u8 scsi_cmd[MAX_COMMAND_SIZE]; -+ int data_dir; -+ -+ memset(scsi_cmd, 0, sizeof(scsi_cmd)); -+ scsi_cmd[0] = ATA_16; -+ if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) { -+ scsi_cmd[1] = (5 << 1); /* PIO Data-out */ -+ /* -+ * No off.line or cc, write to dev, block count in sector count -+ * field. -+ */ -+ scsi_cmd[2] = 0x06; -+ data_dir = DMA_TO_DEVICE; -+ } else { -+ scsi_cmd[1] = (4 << 1); /* PIO Data-in */ -+ /* -+ * No off.line or cc, read from dev, block count in sector count -+ * field. -+ */ -+ scsi_cmd[2] = 0x0e; -+ data_dir = DMA_FROM_DEVICE; -+ } -+ scsi_cmd[4] = feature; -+ scsi_cmd[6] = 1; /* 1 sector */ -+ scsi_cmd[8] = lba_low; -+ scsi_cmd[10] = lba_mid; -+ scsi_cmd[12] = lba_high; -+ scsi_cmd[14] = ata_command; -+ -+ return scsi_execute_req(st->sdev, scsi_cmd, data_dir, -+ st->smartdata, ATA_SECT_SIZE, NULL, HZ, 5, -+ NULL); -+} -+ -+static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature, -+ u8 select) -+{ -+ return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select, -+ ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS); -+} -+ -+static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr, -+ long *temp) -+{ -+ u8 *buf = st->smartdata; -+ bool have_temp = false; -+ u8 temp_raw; -+ u8 csum; -+ int err; -+ int i; -+ -+ err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0); -+ if (err) -+ return err; -+ -+ /* Checksum the read value table */ -+ csum = 0; -+ for (i = 0; i < ATA_SECT_SIZE; i++) -+ csum += buf[i]; -+ if (csum) { -+ dev_dbg(&st->sdev->sdev_gendev, -+ "checksum error reading SMART values\n"); -+ return -EIO; -+ } -+ -+ for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) { -+ u8 *attr = buf + i * 12; -+ int id = attr[2]; -+ -+ if (!id) -+ continue; -+ -+ if (id == SMART_TEMP_PROP_190) { -+ temp_raw = attr[7]; -+ have_temp = true; -+ } -+ if (id == SMART_TEMP_PROP_194) { -+ temp_raw = attr[7]; -+ have_temp = true; -+ break; -+ } -+ } -+ -+ if (have_temp) { -+ *temp = temp_raw * 1000; -+ return 0; -+ } -+ -+ return -ENXIO; -+} -+ -+static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val) -+{ -+ u8 *buf = st->smartdata; -+ int err; -+ -+ err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); -+ if (err) -+ return err; -+ switch (attr) { -+ case hwmon_temp_input: -+ *val = temp_from_sct(buf[SCT_STATUS_TEMP]); -+ break; -+ case hwmon_temp_lowest: -+ *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]); -+ break; -+ case hwmon_temp_highest: -+ *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]); -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } -+ return err; -+} -+ -+static int drivetemp_identify_sata(struct drivetemp_data *st) -+{ -+ struct scsi_device *sdev = st->sdev; -+ u8 *buf = st->smartdata; -+ struct scsi_vpd *vpd; -+ bool is_ata, is_sata; -+ bool have_sct_data_table; -+ bool have_sct_temp; -+ bool have_smart; -+ bool have_sct; -+ u16 *ata_id; -+ u16 version; -+ long temp; -+ int err; -+ -+ /* SCSI-ATA Translation present? */ -+ rcu_read_lock(); -+ vpd = rcu_dereference(sdev->vpd_pg89); -+ -+ /* -+ * Verify that ATA IDENTIFY DEVICE data is included in ATA Information -+ * VPD and that the drive implements the SATA protocol. -+ */ -+ if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA || -+ vpd->data[36] != 0x34) { -+ rcu_read_unlock(); -+ return -ENODEV; -+ } -+ ata_id = (u16 *)&vpd->data[60]; -+ is_ata = ata_id_is_ata(ata_id); -+ is_sata = ata_id_is_sata(ata_id); -+ have_sct = ata_id_sct_supported(ata_id); -+ have_sct_data_table = ata_id_sct_data_tables(ata_id); -+ have_smart = ata_id_smart_supported(ata_id) && -+ ata_id_smart_enabled(ata_id); -+ -+ rcu_read_unlock(); -+ -+ /* bail out if this is not a SATA device */ -+ if (!is_ata || !is_sata) -+ return -ENODEV; -+ if (!have_sct) -+ goto skip_sct; -+ -+ err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); -+ if (err) -+ goto skip_sct; -+ -+ version = (buf[SCT_STATUS_VERSION_HIGH] << 8) | -+ buf[SCT_STATUS_VERSION_LOW]; -+ if (version != 2 && version != 3) -+ goto skip_sct; -+ -+ have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]); -+ if (!have_sct_temp) -+ goto skip_sct; -+ -+ st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]); -+ st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]); -+ -+ if (!have_sct_data_table) -+ goto skip_sct; -+ -+ /* Request and read temperature history table */ -+ memset(buf, '\0', sizeof(st->smartdata)); -+ buf[0] = 5; /* data table command */ -+ buf[2] = 1; /* read table */ -+ buf[4] = 2; /* temperature history table */ -+ -+ err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR); -+ if (err) -+ goto skip_sct_data; -+ -+ err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR); -+ if (err) -+ goto skip_sct_data; -+ -+ /* -+ * Temperature limits per AT Attachment 8 - -+ * ATA/ATAPI Command Set (ATA8-ACS) -+ */ -+ st->have_temp_max = temp_is_valid(buf[6]); -+ st->have_temp_crit = temp_is_valid(buf[7]); -+ st->have_temp_min = temp_is_valid(buf[8]); -+ st->have_temp_lcrit = temp_is_valid(buf[9]); -+ -+ st->temp_max = temp_from_sct(buf[6]); -+ st->temp_crit = temp_from_sct(buf[7]); -+ st->temp_min = temp_from_sct(buf[8]); -+ st->temp_lcrit = temp_from_sct(buf[9]); -+ -+skip_sct_data: -+ if (have_sct_temp) { -+ st->get_temp = drivetemp_get_scttemp; -+ return 0; -+ } -+skip_sct: -+ if (!have_smart) -+ return -ENODEV; -+ st->get_temp = drivetemp_get_smarttemp; -+ return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp); -+} -+ -+static int drivetemp_identify(struct drivetemp_data *st) -+{ -+ struct scsi_device *sdev = st->sdev; -+ -+ /* Bail out immediately if there is no inquiry data */ -+ if (!sdev->inquiry || sdev->inquiry_len < 16) -+ return -ENODEV; -+ -+ /* Disk device? */ -+ if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC) -+ return -ENODEV; -+ -+ return drivetemp_identify_sata(st); -+} -+ -+static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type, -+ u32 attr, int channel, long *val) -+{ -+ struct drivetemp_data *st = dev_get_drvdata(dev); -+ int err = 0; -+ -+ if (type != hwmon_temp) -+ return -EINVAL; -+ -+ switch (attr) { -+ case hwmon_temp_input: -+ case hwmon_temp_lowest: -+ case hwmon_temp_highest: -+ mutex_lock(&st->lock); -+ err = st->get_temp(st, attr, val); -+ mutex_unlock(&st->lock); -+ break; -+ case hwmon_temp_lcrit: -+ *val = st->temp_lcrit; -+ break; -+ case hwmon_temp_min: -+ *val = st->temp_min; -+ break; -+ case hwmon_temp_max: -+ *val = st->temp_max; -+ break; -+ case hwmon_temp_crit: -+ *val = st->temp_crit; -+ break; -+ default: -+ err = -EINVAL; -+ break; -+ } -+ return err; -+} -+ -+static umode_t drivetemp_is_visible(const void *data, -+ enum hwmon_sensor_types type, -+ u32 attr, int channel) -+{ -+ const struct drivetemp_data *st = data; -+ -+ switch (type) { -+ case hwmon_temp: -+ switch (attr) { -+ case hwmon_temp_input: -+ return 0444; -+ case hwmon_temp_lowest: -+ if (st->have_temp_lowest) -+ return 0444; -+ break; -+ case hwmon_temp_highest: -+ if (st->have_temp_highest) -+ return 0444; -+ break; -+ case hwmon_temp_min: -+ if (st->have_temp_min) -+ return 0444; -+ break; -+ case hwmon_temp_max: -+ if (st->have_temp_max) -+ return 0444; -+ break; -+ case hwmon_temp_lcrit: -+ if (st->have_temp_lcrit) -+ return 0444; -+ break; -+ case hwmon_temp_crit: -+ if (st->have_temp_crit) -+ return 0444; -+ break; -+ default: -+ break; -+ } -+ break; -+ default: -+ break; -+ } -+ return 0; -+} -+ -+static const struct hwmon_channel_info *drivetemp_info[] = { -+ HWMON_CHANNEL_INFO(chip, -+ HWMON_C_REGISTER_TZ), -+ HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | -+ HWMON_T_LOWEST | HWMON_T_HIGHEST | -+ HWMON_T_MIN | HWMON_T_MAX | -+ HWMON_T_LCRIT | HWMON_T_CRIT), -+ NULL -+}; -+ -+static const struct hwmon_ops drivetemp_ops = { -+ .is_visible = drivetemp_is_visible, -+ .read = drivetemp_read, -+}; -+ -+static const struct hwmon_chip_info drivetemp_chip_info = { -+ .ops = &drivetemp_ops, -+ .info = drivetemp_info, -+}; -+ -+/* -+ * The device argument points to sdev->sdev_dev. Its parent is -+ * sdev->sdev_gendev, which we can use to get the scsi_device pointer. -+ */ -+static int drivetemp_add(struct device *dev, struct class_interface *intf) -+{ -+ struct scsi_device *sdev = to_scsi_device(dev->parent); -+ struct drivetemp_data *st; -+ int err; -+ -+ st = kzalloc(sizeof(*st), GFP_KERNEL); -+ if (!st) -+ return -ENOMEM; -+ -+ st->sdev = sdev; -+ st->dev = dev; -+ mutex_init(&st->lock); -+ -+ if (drivetemp_identify(st)) { -+ err = -ENODEV; -+ goto abort; -+ } -+ -+ st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp", -+ st, &drivetemp_chip_info, -+ NULL); -+ if (IS_ERR(st->hwdev)) { -+ err = PTR_ERR(st->hwdev); -+ goto abort; -+ } -+ -+ list_add(&st->list, &drivetemp_devlist); -+ return 0; -+ -+abort: -+ kfree(st); -+ return err; -+} -+ -+static void drivetemp_remove(struct device *dev, struct class_interface *intf) -+{ -+ struct drivetemp_data *st, *tmp; -+ -+ list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) { -+ if (st->dev == dev) { -+ list_del(&st->list); -+ hwmon_device_unregister(st->hwdev); -+ kfree(st); -+ break; -+ } -+ } -+} -+ -+static struct class_interface drivetemp_interface = { -+ .add_dev = drivetemp_add, -+ .remove_dev = drivetemp_remove, -+}; -+ -+static int __init drivetemp_init(void) -+{ -+ return scsi_register_interface(&drivetemp_interface); -+} -+ -+static void __exit drivetemp_exit(void) -+{ -+ scsi_unregister_interface(&drivetemp_interface); -+} -+ -+module_init(drivetemp_init); -+module_exit(drivetemp_exit); -+ -+MODULE_AUTHOR("Guenter Roeck <linus@roeck-us.net>"); -+MODULE_DESCRIPTION("Hard drive temperature monitor"); -+MODULE_LICENSE("GPL"); diff --git a/target/linux/generic/backport-5.4/801-v5.6-leds-populate-the-device-s-of_node.patch b/target/linux/generic/backport-5.4/801-v5.6-leds-populate-the-device-s-of_node.patch deleted file mode 100644 index 5c3b58c436..0000000000 --- a/target/linux/generic/backport-5.4/801-v5.6-leds-populate-the-device-s-of_node.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 7a349e8c535d7327bf80710323c725df47149b8d Mon Sep 17 00:00:00 2001 -From: Jean-Jacques Hiblot <jjhiblot@ti.com> -Date: Sun, 5 Jan 2020 23:31:14 +0100 -Subject: [PATCH] leds: populate the device's of_node - -If initialization data is available and its fwnode is actually a -of_node, store this information in the led device's structure. This -will allow the device to use or provide OF-based API such (devm_xxx). - -Signed-off-by: Jean-Jacques Hiblot <jjhiblot@ti.com> -Signed-off-by: Pavel Machek <pavel@ucw.cz> -[backport to 5.4] ---- - ---- a/drivers/leds/led-class.c -+++ b/drivers/leds/led-class.c -@@ -19,6 +19,7 @@ - #include <linux/spinlock.h> - #include <linux/timer.h> - #include <uapi/linux/uleds.h> -+#include <linux/of.h> - #include "leds.h" - - static struct class *leds_class; -@@ -277,8 +278,10 @@ int led_classdev_register_ext(struct dev - mutex_unlock(&led_cdev->led_access); - return PTR_ERR(led_cdev->dev); - } -- if (init_data && init_data->fwnode) -+ if (init_data && init_data->fwnode) { - led_cdev->dev->fwnode = init_data->fwnode; -+ led_cdev->dev->of_node = to_of_node(init_data->fwnode); -+ } - - if (ret) - dev_warn(parent, "Led %s renamed to %s due to name collision", diff --git a/target/linux/generic/backport-5.4/803-v5.8-i2c-pxa-use-official-address-byte-helper.patch b/target/linux/generic/backport-5.4/803-v5.8-i2c-pxa-use-official-address-byte-helper.patch deleted file mode 100644 index a937b52d9d..0000000000 --- a/target/linux/generic/backport-5.4/803-v5.8-i2c-pxa-use-official-address-byte-helper.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 01/17] i2c: pxa: use official address byte helper -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -i2c-pxa was created before i2c_8bit_addr_from_msg() was implemented, -and used its own i2c_pxa_addr_byte() which is functionally the same. -Sadly, it was never updated to use this new helper. Switch it over. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 21 +++++++-------------- - 1 file changed, 7 insertions(+), 14 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -674,16 +674,6 @@ static void i2c_pxa_slave_stop(struct px - * PXA I2C Master mode - */ - --static inline unsigned int i2c_pxa_addr_byte(struct i2c_msg *msg) --{ -- unsigned int addr = (msg->addr & 0x7f) << 1; -- -- if (msg->flags & I2C_M_RD) -- addr |= 1; -- -- return addr; --} -- - static inline void i2c_pxa_start_message(struct pxa_i2c *i2c) - { - u32 icr; -@@ -691,8 +681,8 @@ static inline void i2c_pxa_start_message - /* - * Step 1: target slave address into IDBR - */ -- writel(i2c_pxa_addr_byte(i2c->msg), _IDBR(i2c)); -- i2c->req_slave_addr = i2c_pxa_addr_byte(i2c->msg); -+ i2c->req_slave_addr = i2c_8bit_addr_from_msg(i2c->msg); -+ writel(i2c->req_slave_addr, _IDBR(i2c)); - - /* - * Step 2: initiate the write. -@@ -1003,8 +993,8 @@ static void i2c_pxa_irq_txempty(struct p - /* - * Write the next address. - */ -- writel(i2c_pxa_addr_byte(i2c->msg), _IDBR(i2c)); -- i2c->req_slave_addr = i2c_pxa_addr_byte(i2c->msg); -+ i2c->req_slave_addr = i2c_8bit_addr_from_msg(i2c->msg); -+ writel(i2c->req_slave_addr, _IDBR(i2c)); - - /* - * And trigger a repeated start, and send the byte. diff --git a/target/linux/generic/backport-5.4/804-v5.8-i2c-pxa-remove-unneeded-includes.patch b/target/linux/generic/backport-5.4/804-v5.8-i2c-pxa-remove-unneeded-includes.patch deleted file mode 100644 index 6a911325dd..0000000000 --- a/target/linux/generic/backport-5.4/804-v5.8-i2c-pxa-remove-unneeded-includes.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 02/17] i2c: pxa: remove unneeded includes -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -i2c-pxa does not need linux/sched.h nor linux/time.h includes, so -remove these. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -20,8 +20,6 @@ - #include <linux/module.h> - #include <linux/i2c.h> - #include <linux/init.h> --#include <linux/time.h> --#include <linux/sched.h> - #include <linux/delay.h> - #include <linux/errno.h> - #include <linux/interrupt.h> -@@ -35,8 +33,6 @@ - #include <linux/io.h> - #include <linux/platform_data/i2c-pxa.h> - --#include <asm/irq.h> -- - struct pxa_reg_layout { - u32 ibmr; - u32 idbr; diff --git a/target/linux/generic/backport-5.4/805-v5.8-i2c-pxa-re-arrange-includes-to-be-in-alphabetical-or.patch b/target/linux/generic/backport-5.4/805-v5.8-i2c-pxa-re-arrange-includes-to-be-in-alphabetical-or.patch deleted file mode 100644 index 4d6dc7f071..0000000000 --- a/target/linux/generic/backport-5.4/805-v5.8-i2c-pxa-re-arrange-includes-to-be-in-alphabetical-or.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 03/17] i2c: pxa: re-arrange includes to be in alphabetical - order -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Arrange the includes to be in alphabetical order to help avoid -duplicated includes. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -16,22 +16,22 @@ - * Dec 2004: Added support for PXA27x and slave device probing [Liam Girdwood] - * Feb 2005: Rework slave mode handling [RMK] - */ --#include <linux/kernel.h> --#include <linux/module.h> --#include <linux/i2c.h> --#include <linux/init.h> -+#include <linux/clk.h> - #include <linux/delay.h> -+#include <linux/err.h> - #include <linux/errno.h> --#include <linux/interrupt.h> -+#include <linux/i2c.h> - #include <linux/i2c-pxa.h> -+#include <linux/init.h> -+#include <linux/interrupt.h> -+#include <linux/io.h> -+#include <linux/kernel.h> -+#include <linux/module.h> - #include <linux/of.h> - #include <linux/of_device.h> - #include <linux/platform_device.h> --#include <linux/err.h> --#include <linux/clk.h> --#include <linux/slab.h> --#include <linux/io.h> - #include <linux/platform_data/i2c-pxa.h> -+#include <linux/slab.h> - - struct pxa_reg_layout { - u32 ibmr; diff --git a/target/linux/generic/backport-5.4/806-v5.8-i2c-pxa-re-arrange-functions-to-flow-better.patch b/target/linux/generic/backport-5.4/806-v5.8-i2c-pxa-re-arrange-functions-to-flow-better.patch deleted file mode 100644 index 9f09f9dacb..0000000000 --- a/target/linux/generic/backport-5.4/806-v5.8-i2c-pxa-re-arrange-functions-to-flow-better.patch +++ /dev/null @@ -1,380 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 04/17] i2c: pxa: re-arrange functions to flow better -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Re-arrange the PXA I2C code to avoid forward declarations, and keep -similar functionality (e.g. the non-IRQ mode support) together. This -improves code readability. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 325 +++++++++++++++++------------------ - 1 file changed, 162 insertions(+), 163 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -326,7 +326,6 @@ static void i2c_pxa_scream_blue_murder(s - #endif /* ifdef DEBUG / else */ - - static void i2c_pxa_master_complete(struct pxa_i2c *i2c, int ret); --static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id); - - static inline int i2c_pxa_is_slavemode(struct pxa_i2c *i2c) - { -@@ -697,34 +696,6 @@ static inline void i2c_pxa_stop_message( - writel(icr, _ICR(i2c)); - } - --static int i2c_pxa_pio_set_master(struct pxa_i2c *i2c) --{ -- /* make timeout the same as for interrupt based functions */ -- long timeout = 2 * DEF_TIMEOUT; -- -- /* -- * Wait for the bus to become free. -- */ -- while (timeout-- && readl(_ISR(i2c)) & (ISR_IBB | ISR_UB)) { -- udelay(1000); -- show_state(i2c); -- } -- -- if (timeout < 0) { -- show_state(i2c); -- dev_err(&i2c->adap.dev, -- "i2c_pxa: timeout waiting for bus free\n"); -- return I2C_RETRY; -- } -- -- /* -- * Set master mode. -- */ -- writel(readl(_ICR(i2c)) | ICR_SCLE, _ICR(i2c)); -- -- return 0; --} -- - /* - * PXA I2C send master code - * 1. Load master code to IDBR and send it. -@@ -753,140 +724,6 @@ static int i2c_pxa_send_mastercode(struc - return (timeout == 0) ? I2C_RETRY : 0; - } - --static int i2c_pxa_do_pio_xfer(struct pxa_i2c *i2c, -- struct i2c_msg *msg, int num) --{ -- unsigned long timeout = 500000; /* 5 seconds */ -- int ret = 0; -- -- ret = i2c_pxa_pio_set_master(i2c); -- if (ret) -- goto out; -- -- i2c->msg = msg; -- i2c->msg_num = num; -- i2c->msg_idx = 0; -- i2c->msg_ptr = 0; -- i2c->irqlogidx = 0; -- -- i2c_pxa_start_message(i2c); -- -- while (i2c->msg_num > 0 && --timeout) { -- i2c_pxa_handler(0, i2c); -- udelay(10); -- } -- -- i2c_pxa_stop_message(i2c); -- -- /* -- * We place the return code in i2c->msg_idx. -- */ -- ret = i2c->msg_idx; -- --out: -- if (timeout == 0) { -- i2c_pxa_scream_blue_murder(i2c, "timeout"); -- ret = I2C_RETRY; -- } -- -- return ret; --} -- --/* -- * We are protected by the adapter bus mutex. -- */ --static int i2c_pxa_do_xfer(struct pxa_i2c *i2c, struct i2c_msg *msg, int num) --{ -- long timeout; -- int ret; -- -- /* -- * Wait for the bus to become free. -- */ -- ret = i2c_pxa_wait_bus_not_busy(i2c); -- if (ret) { -- dev_err(&i2c->adap.dev, "i2c_pxa: timeout waiting for bus free\n"); -- goto out; -- } -- -- /* -- * Set master mode. -- */ -- ret = i2c_pxa_set_master(i2c); -- if (ret) { -- dev_err(&i2c->adap.dev, "i2c_pxa_set_master: error %d\n", ret); -- goto out; -- } -- -- if (i2c->high_mode) { -- ret = i2c_pxa_send_mastercode(i2c); -- if (ret) { -- dev_err(&i2c->adap.dev, "i2c_pxa_send_mastercode timeout\n"); -- goto out; -- } -- } -- -- spin_lock_irq(&i2c->lock); -- -- i2c->msg = msg; -- i2c->msg_num = num; -- i2c->msg_idx = 0; -- i2c->msg_ptr = 0; -- i2c->irqlogidx = 0; -- -- i2c_pxa_start_message(i2c); -- -- spin_unlock_irq(&i2c->lock); -- -- /* -- * The rest of the processing occurs in the interrupt handler. -- */ -- timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5); -- i2c_pxa_stop_message(i2c); -- -- /* -- * We place the return code in i2c->msg_idx. -- */ -- ret = i2c->msg_idx; -- -- if (!timeout && i2c->msg_num) { -- i2c_pxa_scream_blue_murder(i2c, "timeout"); -- ret = I2C_RETRY; -- } -- -- out: -- return ret; --} -- --static int i2c_pxa_pio_xfer(struct i2c_adapter *adap, -- struct i2c_msg msgs[], int num) --{ -- struct pxa_i2c *i2c = adap->algo_data; -- int ret, i; -- -- /* If the I2C controller is disabled we need to reset it -- (probably due to a suspend/resume destroying state). We do -- this here as we can then avoid worrying about resuming the -- controller before its users. */ -- if (!(readl(_ICR(i2c)) & ICR_IUE)) -- i2c_pxa_reset(i2c); -- -- for (i = adap->retries; i >= 0; i--) { -- ret = i2c_pxa_do_pio_xfer(i2c, msgs, num); -- if (ret != I2C_RETRY) -- goto out; -- -- if (i2c_debug) -- dev_dbg(&adap->dev, "Retrying transmission\n"); -- udelay(100); -- } -- i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); -- ret = -EREMOTEIO; -- out: -- i2c_pxa_set_slave(i2c, ret); -- return ret; --} -- - /* - * i2c_pxa_master_complete - complete the message and wake up. - */ -@@ -1093,6 +930,71 @@ static irqreturn_t i2c_pxa_handler(int t - return IRQ_HANDLED; - } - -+/* -+ * We are protected by the adapter bus mutex. -+ */ -+static int i2c_pxa_do_xfer(struct pxa_i2c *i2c, struct i2c_msg *msg, int num) -+{ -+ long timeout; -+ int ret; -+ -+ /* -+ * Wait for the bus to become free. -+ */ -+ ret = i2c_pxa_wait_bus_not_busy(i2c); -+ if (ret) { -+ dev_err(&i2c->adap.dev, "i2c_pxa: timeout waiting for bus free\n"); -+ goto out; -+ } -+ -+ /* -+ * Set master mode. -+ */ -+ ret = i2c_pxa_set_master(i2c); -+ if (ret) { -+ dev_err(&i2c->adap.dev, "i2c_pxa_set_master: error %d\n", ret); -+ goto out; -+ } -+ -+ if (i2c->high_mode) { -+ ret = i2c_pxa_send_mastercode(i2c); -+ if (ret) { -+ dev_err(&i2c->adap.dev, "i2c_pxa_send_mastercode timeout\n"); -+ goto out; -+ } -+ } -+ -+ spin_lock_irq(&i2c->lock); -+ -+ i2c->msg = msg; -+ i2c->msg_num = num; -+ i2c->msg_idx = 0; -+ i2c->msg_ptr = 0; -+ i2c->irqlogidx = 0; -+ -+ i2c_pxa_start_message(i2c); -+ -+ spin_unlock_irq(&i2c->lock); -+ -+ /* -+ * The rest of the processing occurs in the interrupt handler. -+ */ -+ timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5); -+ i2c_pxa_stop_message(i2c); -+ -+ /* -+ * We place the return code in i2c->msg_idx. -+ */ -+ ret = i2c->msg_idx; -+ -+ if (!timeout && i2c->msg_num) { -+ i2c_pxa_scream_blue_murder(i2c, "timeout"); -+ ret = I2C_RETRY; -+ } -+ -+ out: -+ return ret; -+} - - static int i2c_pxa_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) - { -@@ -1126,6 +1028,103 @@ static const struct i2c_algorithm i2c_px - .functionality = i2c_pxa_functionality, - }; - -+/* Non-interrupt mode support */ -+static int i2c_pxa_pio_set_master(struct pxa_i2c *i2c) -+{ -+ /* make timeout the same as for interrupt based functions */ -+ long timeout = 2 * DEF_TIMEOUT; -+ -+ /* -+ * Wait for the bus to become free. -+ */ -+ while (timeout-- && readl(_ISR(i2c)) & (ISR_IBB | ISR_UB)) { -+ udelay(1000); -+ show_state(i2c); -+ } -+ -+ if (timeout < 0) { -+ show_state(i2c); -+ dev_err(&i2c->adap.dev, -+ "i2c_pxa: timeout waiting for bus free\n"); -+ return I2C_RETRY; -+ } -+ -+ /* -+ * Set master mode. -+ */ -+ writel(readl(_ICR(i2c)) | ICR_SCLE, _ICR(i2c)); -+ -+ return 0; -+} -+ -+static int i2c_pxa_do_pio_xfer(struct pxa_i2c *i2c, -+ struct i2c_msg *msg, int num) -+{ -+ unsigned long timeout = 500000; /* 5 seconds */ -+ int ret = 0; -+ -+ ret = i2c_pxa_pio_set_master(i2c); -+ if (ret) -+ goto out; -+ -+ i2c->msg = msg; -+ i2c->msg_num = num; -+ i2c->msg_idx = 0; -+ i2c->msg_ptr = 0; -+ i2c->irqlogidx = 0; -+ -+ i2c_pxa_start_message(i2c); -+ -+ while (i2c->msg_num > 0 && --timeout) { -+ i2c_pxa_handler(0, i2c); -+ udelay(10); -+ } -+ -+ i2c_pxa_stop_message(i2c); -+ -+ /* -+ * We place the return code in i2c->msg_idx. -+ */ -+ ret = i2c->msg_idx; -+ -+out: -+ if (timeout == 0) { -+ i2c_pxa_scream_blue_murder(i2c, "timeout"); -+ ret = I2C_RETRY; -+ } -+ -+ return ret; -+} -+ -+static int i2c_pxa_pio_xfer(struct i2c_adapter *adap, -+ struct i2c_msg msgs[], int num) -+{ -+ struct pxa_i2c *i2c = adap->algo_data; -+ int ret, i; -+ -+ /* If the I2C controller is disabled we need to reset it -+ (probably due to a suspend/resume destroying state). We do -+ this here as we can then avoid worrying about resuming the -+ controller before its users. */ -+ if (!(readl(_ICR(i2c)) & ICR_IUE)) -+ i2c_pxa_reset(i2c); -+ -+ for (i = adap->retries; i >= 0; i--) { -+ ret = i2c_pxa_do_pio_xfer(i2c, msgs, num); -+ if (ret != I2C_RETRY) -+ goto out; -+ -+ if (i2c_debug) -+ dev_dbg(&adap->dev, "Retrying transmission\n"); -+ udelay(100); -+ } -+ i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); -+ ret = -EREMOTEIO; -+ out: -+ i2c_pxa_set_slave(i2c, ret); -+ return ret; -+} -+ - static const struct i2c_algorithm i2c_pxa_pio_algorithm = { - .master_xfer = i2c_pxa_pio_xfer, - .functionality = i2c_pxa_functionality, diff --git a/target/linux/generic/backport-5.4/807-v5.8-i2c-pxa-re-arrange-register-field-definitions.patch b/target/linux/generic/backport-5.4/807-v5.8-i2c-pxa-re-arrange-register-field-definitions.patch deleted file mode 100644 index afade04877..0000000000 --- a/target/linux/generic/backport-5.4/807-v5.8-i2c-pxa-re-arrange-register-field-definitions.patch +++ /dev/null @@ -1,161 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 05/17] i2c: pxa: re-arrange register field definitions -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Arrange the register field definitions to be grouped together, rather -than the Armada-3700 definitions being separated from the rest of the -definitions. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 113 ++++++++++++++++------------------- - 1 file changed, 53 insertions(+), 60 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -33,6 +33,56 @@ - #include <linux/platform_data/i2c-pxa.h> - #include <linux/slab.h> - -+/* I2C register field definitions */ -+#define ICR_START (1 << 0) /* start bit */ -+#define ICR_STOP (1 << 1) /* stop bit */ -+#define ICR_ACKNAK (1 << 2) /* send ACK(0) or NAK(1) */ -+#define ICR_TB (1 << 3) /* transfer byte bit */ -+#define ICR_MA (1 << 4) /* master abort */ -+#define ICR_SCLE (1 << 5) /* master clock enable */ -+#define ICR_IUE (1 << 6) /* unit enable */ -+#define ICR_GCD (1 << 7) /* general call disable */ -+#define ICR_ITEIE (1 << 8) /* enable tx interrupts */ -+#define ICR_IRFIE (1 << 9) /* enable rx interrupts */ -+#define ICR_BEIE (1 << 10) /* enable bus error ints */ -+#define ICR_SSDIE (1 << 11) /* slave STOP detected int enable */ -+#define ICR_ALDIE (1 << 12) /* enable arbitration interrupt */ -+#define ICR_SADIE (1 << 13) /* slave address detected int enable */ -+#define ICR_UR (1 << 14) /* unit reset */ -+#define ICR_FM (1 << 15) /* fast mode */ -+#define ICR_HS (1 << 16) /* High Speed mode */ -+#define ICR_A3700_FM (1 << 16) /* fast mode for armada-3700 */ -+#define ICR_A3700_HS (1 << 17) /* high speed mode for armada-3700 */ -+#define ICR_GPIOEN (1 << 19) /* enable GPIO mode for SCL in HS */ -+ -+#define ISR_RWM (1 << 0) /* read/write mode */ -+#define ISR_ACKNAK (1 << 1) /* ack/nak status */ -+#define ISR_UB (1 << 2) /* unit busy */ -+#define ISR_IBB (1 << 3) /* bus busy */ -+#define ISR_SSD (1 << 4) /* slave stop detected */ -+#define ISR_ALD (1 << 5) /* arbitration loss detected */ -+#define ISR_ITE (1 << 6) /* tx buffer empty */ -+#define ISR_IRF (1 << 7) /* rx buffer full */ -+#define ISR_GCAD (1 << 8) /* general call address detected */ -+#define ISR_SAD (1 << 9) /* slave address detected */ -+#define ISR_BED (1 << 10) /* bus error no ACK/NAK */ -+ -+#define ILCR_SLV_SHIFT 0 -+#define ILCR_SLV_MASK (0x1FF << ILCR_SLV_SHIFT) -+#define ILCR_FLV_SHIFT 9 -+#define ILCR_FLV_MASK (0x1FF << ILCR_FLV_SHIFT) -+#define ILCR_HLVL_SHIFT 18 -+#define ILCR_HLVL_MASK (0x1FF << ILCR_HLVL_SHIFT) -+#define ILCR_HLVH_SHIFT 27 -+#define ILCR_HLVH_MASK (0x1F << ILCR_HLVH_SHIFT) -+ -+#define IWCR_CNT_SHIFT 0 -+#define IWCR_CNT_MASK (0x1F << IWCR_CNT_SHIFT) -+#define IWCR_HS_CNT1_SHIFT 5 -+#define IWCR_HS_CNT1_MASK (0x1F << IWCR_HS_CNT1_SHIFT) -+#define IWCR_HS_CNT2_SHIFT 10 -+#define IWCR_HS_CNT2_MASK (0x1F << IWCR_HS_CNT2_SHIFT) -+ - struct pxa_reg_layout { - u32 ibmr; - u32 idbr; -@@ -53,12 +103,7 @@ enum pxa_i2c_types { - REGS_A3700, - }; - --#define ICR_BUSMODE_FM (1 << 16) /* shifted fast mode for armada-3700 */ --#define ICR_BUSMODE_HS (1 << 17) /* shifted high speed mode for armada-3700 */ -- --/* -- * I2C registers definitions -- */ -+/* I2C register layout definitions */ - static struct pxa_reg_layout pxa_reg_layout[] = { - [REGS_PXA2XX] = { - .ibmr = 0x00, -@@ -96,8 +141,8 @@ static struct pxa_reg_layout pxa_reg_lay - .icr = 0x08, - .isr = 0x0c, - .isar = 0x10, -- .fm = ICR_BUSMODE_FM, -- .hs = ICR_BUSMODE_HS, -+ .fm = ICR_A3700_FM, -+ .hs = ICR_A3700_HS, - }, - }; - -@@ -111,58 +156,6 @@ static const struct platform_device_id i - }; - MODULE_DEVICE_TABLE(platform, i2c_pxa_id_table); - --/* -- * I2C bit definitions -- */ -- --#define ICR_START (1 << 0) /* start bit */ --#define ICR_STOP (1 << 1) /* stop bit */ --#define ICR_ACKNAK (1 << 2) /* send ACK(0) or NAK(1) */ --#define ICR_TB (1 << 3) /* transfer byte bit */ --#define ICR_MA (1 << 4) /* master abort */ --#define ICR_SCLE (1 << 5) /* master clock enable */ --#define ICR_IUE (1 << 6) /* unit enable */ --#define ICR_GCD (1 << 7) /* general call disable */ --#define ICR_ITEIE (1 << 8) /* enable tx interrupts */ --#define ICR_IRFIE (1 << 9) /* enable rx interrupts */ --#define ICR_BEIE (1 << 10) /* enable bus error ints */ --#define ICR_SSDIE (1 << 11) /* slave STOP detected int enable */ --#define ICR_ALDIE (1 << 12) /* enable arbitration interrupt */ --#define ICR_SADIE (1 << 13) /* slave address detected int enable */ --#define ICR_UR (1 << 14) /* unit reset */ --#define ICR_FM (1 << 15) /* fast mode */ --#define ICR_HS (1 << 16) /* High Speed mode */ --#define ICR_GPIOEN (1 << 19) /* enable GPIO mode for SCL in HS */ -- --#define ISR_RWM (1 << 0) /* read/write mode */ --#define ISR_ACKNAK (1 << 1) /* ack/nak status */ --#define ISR_UB (1 << 2) /* unit busy */ --#define ISR_IBB (1 << 3) /* bus busy */ --#define ISR_SSD (1 << 4) /* slave stop detected */ --#define ISR_ALD (1 << 5) /* arbitration loss detected */ --#define ISR_ITE (1 << 6) /* tx buffer empty */ --#define ISR_IRF (1 << 7) /* rx buffer full */ --#define ISR_GCAD (1 << 8) /* general call address detected */ --#define ISR_SAD (1 << 9) /* slave address detected */ --#define ISR_BED (1 << 10) /* bus error no ACK/NAK */ -- --/* bit field shift & mask */ --#define ILCR_SLV_SHIFT 0 --#define ILCR_SLV_MASK (0x1FF << ILCR_SLV_SHIFT) --#define ILCR_FLV_SHIFT 9 --#define ILCR_FLV_MASK (0x1FF << ILCR_FLV_SHIFT) --#define ILCR_HLVL_SHIFT 18 --#define ILCR_HLVL_MASK (0x1FF << ILCR_HLVL_SHIFT) --#define ILCR_HLVH_SHIFT 27 --#define ILCR_HLVH_MASK (0x1F << ILCR_HLVH_SHIFT) -- --#define IWCR_CNT_SHIFT 0 --#define IWCR_CNT_MASK (0x1F << IWCR_CNT_SHIFT) --#define IWCR_HS_CNT1_SHIFT 5 --#define IWCR_HS_CNT1_MASK (0x1F << IWCR_HS_CNT1_SHIFT) --#define IWCR_HS_CNT2_SHIFT 10 --#define IWCR_HS_CNT2_MASK (0x1F << IWCR_HS_CNT2_SHIFT) -- - struct pxa_i2c { - spinlock_t lock; - wait_queue_head_t wait; diff --git a/target/linux/generic/backport-5.4/808-v5.8-i2c-pxa-add-and-use-definitions-for-IBMR-register.patch b/target/linux/generic/backport-5.4/808-v5.8-i2c-pxa-add-and-use-definitions-for-IBMR-register.patch deleted file mode 100644 index f197808d23..0000000000 --- a/target/linux/generic/backport-5.4/808-v5.8-i2c-pxa-add-and-use-definitions-for-IBMR-register.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 06/17] i2c: pxa: add and use definitions for IBMR register -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Add definitions for the bits in the IBMR register, and use them in the -code. This improves readability. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -34,6 +34,9 @@ - #include <linux/slab.h> - - /* I2C register field definitions */ -+#define IBMR_SDAS (1 << 0) -+#define IBMR_SCLS (1 << 1) -+ - #define ICR_START (1 << 0) /* start bit */ - #define ICR_STOP (1 << 1) /* stop bit */ - #define ICR_ACKNAK (1 << 2) /* send ACK(0) or NAK(1) */ -@@ -334,7 +337,7 @@ static void i2c_pxa_abort(struct pxa_i2c - return; - } - -- while ((i > 0) && (readl(_IBMR(i2c)) & 0x1) == 0) { -+ while ((i > 0) && (readl(_IBMR(i2c)) & IBMR_SDAS) == 0) { - unsigned long icr = readl(_ICR(i2c)); - - icr &= ~ICR_START; -@@ -389,7 +392,8 @@ static int i2c_pxa_wait_master(struct px - * quick check of the i2c lines themselves to ensure they've - * gone high... - */ -- if ((readl(_ISR(i2c)) & (ISR_UB | ISR_IBB)) == 0 && readl(_IBMR(i2c)) == 3) { -+ if ((readl(_ISR(i2c)) & (ISR_UB | ISR_IBB)) == 0 && -+ readl(_IBMR(i2c)) == (IBMR_SCLS | IBMR_SDAS)) { - if (i2c_debug > 0) - dev_dbg(&i2c->adap.dev, "%s: done\n", __func__); - return 1; -@@ -574,7 +578,7 @@ static void i2c_pxa_slave_start(struct p - timeout = 0x10000; - - while (1) { -- if ((readl(_IBMR(i2c)) & 2) == 2) -+ if ((readl(_IBMR(i2c)) & IBMR_SCLS) == IBMR_SCLS) - break; - - timeout--; -@@ -637,7 +641,7 @@ static void i2c_pxa_slave_start(struct p - timeout = 0x10000; - - while (1) { -- if ((readl(_IBMR(i2c)) & 2) == 2) -+ if ((readl(_IBMR(i2c)) & IBMR_SCLS) == IBMR_SCLS) - break; - - timeout--; diff --git a/target/linux/generic/backport-5.4/809-v5.8-i2c-pxa-always-set-fm-and-hs-members-for-each-type.patch b/target/linux/generic/backport-5.4/809-v5.8-i2c-pxa-always-set-fm-and-hs-members-for-each-type.patch deleted file mode 100644 index 9b1dee62ce..0000000000 --- a/target/linux/generic/backport-5.4/809-v5.8-i2c-pxa-always-set-fm-and-hs-members-for-each-type.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 07/17] i2c: pxa: always set fm and hs members for each type -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Always set the fm and hs members of struct pxa_reg_layout. These -members are already taking space, we don't need code as well. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -114,6 +114,8 @@ static struct pxa_reg_layout pxa_reg_lay - .icr = 0x10, - .isr = 0x18, - .isar = 0x20, -+ .fm = ICR_FM, -+ .hs = ICR_HS, - }, - [REGS_PXA3XX] = { - .ibmr = 0x00, -@@ -121,6 +123,8 @@ static struct pxa_reg_layout pxa_reg_lay - .icr = 0x08, - .isr = 0x0c, - .isar = 0x10, -+ .fm = ICR_FM, -+ .hs = ICR_HS, - }, - [REGS_CE4100] = { - .ibmr = 0x14, -@@ -128,6 +132,8 @@ static struct pxa_reg_layout pxa_reg_lay - .icr = 0x00, - .isr = 0x04, - /* no isar register */ -+ .fm = ICR_FM, -+ .hs = ICR_HS, - }, - [REGS_PXA910] = { - .ibmr = 0x00, -@@ -137,6 +143,8 @@ static struct pxa_reg_layout pxa_reg_lay - .isar = 0x20, - .ilcr = 0x28, - .iwcr = 0x30, -+ .fm = ICR_FM, -+ .hs = ICR_HS, - }, - [REGS_A3700] = { - .ibmr = 0x00, -@@ -1229,8 +1237,8 @@ static int i2c_pxa_probe(struct platform - i2c->reg_idbr = i2c->reg_base + pxa_reg_layout[i2c_type].idbr; - i2c->reg_icr = i2c->reg_base + pxa_reg_layout[i2c_type].icr; - i2c->reg_isr = i2c->reg_base + pxa_reg_layout[i2c_type].isr; -- i2c->fm_mask = pxa_reg_layout[i2c_type].fm ? : ICR_FM; -- i2c->hs_mask = pxa_reg_layout[i2c_type].hs ? : ICR_HS; -+ i2c->fm_mask = pxa_reg_layout[i2c_type].fm; -+ i2c->hs_mask = pxa_reg_layout[i2c_type].hs; - - if (i2c_type != REGS_CE4100) - i2c->reg_isar = i2c->reg_base + pxa_reg_layout[i2c_type].isar; diff --git a/target/linux/generic/backport-5.4/810-v5.8-i2c-pxa-move-private-definitions-to-i2c-pxa.c.patch b/target/linux/generic/backport-5.4/810-v5.8-i2c-pxa-move-private-definitions-to-i2c-pxa.c.patch deleted file mode 100644 index dda463052f..0000000000 --- a/target/linux/generic/backport-5.4/810-v5.8-i2c-pxa-move-private-definitions-to-i2c-pxa.c.patch +++ /dev/null @@ -1,128 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 08/17] i2c: pxa: move private definitions to i2c-pxa.c -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Move driver-private definitions out of the i2c-pxa.h platform data -header file into the driver itself. Nothing outside of the driver -makes use of these constants. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 43 ++++++++++++++++++++++++ - include/linux/platform_data/i2c-pxa.h | 48 --------------------------- - 2 files changed, 43 insertions(+), 48 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -86,6 +86,49 @@ - #define IWCR_HS_CNT2_SHIFT 10 - #define IWCR_HS_CNT2_MASK (0x1F << IWCR_HS_CNT2_SHIFT) - -+/* need a longer timeout if we're dealing with the fact we may well be -+ * looking at a multi-master environment -+ */ -+#define DEF_TIMEOUT 32 -+ -+#define BUS_ERROR (-EREMOTEIO) -+#define XFER_NAKED (-ECONNREFUSED) -+#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ -+ -+/* ICR initialize bit values -+ * -+ * 15 FM 0 (100 kHz operation) -+ * 14 UR 0 (No unit reset) -+ * 13 SADIE 0 (Disables the unit from interrupting on slave addresses -+ * matching its slave address) -+ * 12 ALDIE 0 (Disables the unit from interrupt when it loses arbitration -+ * in master mode) -+ * 11 SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) -+ * 10 BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) -+ * 9 IRFIE 1 (Enable interrupts from full buffer received) -+ * 8 ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) -+ * 7 GCD 1 (Disables i2c unit response to general call messages as a slave) -+ * 6 IUE 0 (Disable unit until we change settings) -+ * 5 SCLE 1 (Enables the i2c clock output for master mode (drives SCL) -+ * 4 MA 0 (Only send stop with the ICR stop bit) -+ * 3 TB 0 (We are not transmitting a byte initially) -+ * 2 ACKNAK 0 (Send an ACK after the unit receives a byte) -+ * 1 STOP 0 (Do not send a STOP) -+ * 0 START 0 (Do not send a START) -+ */ -+#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) -+ -+/* I2C status register init values -+ * -+ * 10 BED 1 (Clear bus error detected) -+ * 9 SAD 1 (Clear slave address detected) -+ * 7 IRF 1 (Clear IDBR Receive Full) -+ * 6 ITE 1 (Clear IDBR Transmit Empty) -+ * 5 ALD 1 (Clear Arbitration Loss Detected) -+ * 4 SSD 1 (Clear Slave Stop Detected) -+ */ -+#define I2C_ISR_INIT 0x7FF /* status register init */ -+ - struct pxa_reg_layout { - u32 ibmr; - u32 idbr; ---- a/include/linux/platform_data/i2c-pxa.h -+++ b/include/linux/platform_data/i2c-pxa.h -@@ -7,54 +7,6 @@ - #ifndef _I2C_PXA_H_ - #define _I2C_PXA_H_ - --#if 0 --#define DEF_TIMEOUT 3 --#else --/* need a longer timeout if we're dealing with the fact we may well be -- * looking at a multi-master environment --*/ --#define DEF_TIMEOUT 32 --#endif -- --#define BUS_ERROR (-EREMOTEIO) --#define XFER_NAKED (-ECONNREFUSED) --#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ -- --/* ICR initialize bit values --* --* 15. FM 0 (100 Khz operation) --* 14. UR 0 (No unit reset) --* 13. SADIE 0 (Disables the unit from interrupting on slave addresses --* matching its slave address) --* 12. ALDIE 0 (Disables the unit from interrupt when it loses arbitration --* in master mode) --* 11. SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) --* 10. BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) --* 9. IRFIE 1 (Enable interrupts from full buffer received) --* 8. ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) --* 7. GCD 1 (Disables i2c unit response to general call messages as a slave) --* 6. IUE 0 (Disable unit until we change settings) --* 5. SCLE 1 (Enables the i2c clock output for master mode (drives SCL) --* 4. MA 0 (Only send stop with the ICR stop bit) --* 3. TB 0 (We are not transmitting a byte initially) --* 2. ACKNAK 0 (Send an ACK after the unit receives a byte) --* 1. STOP 0 (Do not send a STOP) --* 0. START 0 (Do not send a START) --* --*/ --#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) -- --/* I2C status register init values -- * -- * 10. BED 1 (Clear bus error detected) -- * 9. SAD 1 (Clear slave address detected) -- * 7. IRF 1 (Clear IDBR Receive Full) -- * 6. ITE 1 (Clear IDBR Transmit Empty) -- * 5. ALD 1 (Clear Arbitration Loss Detected) -- * 4. SSD 1 (Clear Slave Stop Detected) -- */ --#define I2C_ISR_INIT 0x7FF /* status register init */ -- - struct i2c_slave_client; - - struct i2c_pxa_platform_data { diff --git a/target/linux/generic/backport-5.4/811-v5.8-i2c-pxa-move-DT-IDs-along-side-platform-IDs.patch b/target/linux/generic/backport-5.4/811-v5.8-i2c-pxa-move-DT-IDs-along-side-platform-IDs.patch deleted file mode 100644 index 02565229d8..0000000000 --- a/target/linux/generic/backport-5.4/811-v5.8-i2c-pxa-move-DT-IDs-along-side-platform-IDs.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 09/17] i2c: pxa: move DT IDs along side platform IDs -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Move the ID tables into one place, near the device dependent data. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -200,6 +200,15 @@ static struct pxa_reg_layout pxa_reg_lay - }, - }; - -+static const struct of_device_id i2c_pxa_dt_ids[] = { -+ { .compatible = "mrvl,pxa-i2c", .data = (void *)REGS_PXA2XX }, -+ { .compatible = "mrvl,pwri2c", .data = (void *)REGS_PXA3XX }, -+ { .compatible = "mrvl,mmp-twsi", .data = (void *)REGS_PXA910 }, -+ { .compatible = "marvell,armada-3700-i2c", .data = (void *)REGS_A3700 }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, i2c_pxa_dt_ids); -+ - static const struct platform_device_id i2c_pxa_id_table[] = { - { "pxa2xx-i2c", REGS_PXA2XX }, - { "pxa3xx-pwri2c", REGS_PXA3XX }, -@@ -1178,15 +1187,6 @@ static const struct i2c_algorithm i2c_px - .functionality = i2c_pxa_functionality, - }; - --static const struct of_device_id i2c_pxa_dt_ids[] = { -- { .compatible = "mrvl,pxa-i2c", .data = (void *)REGS_PXA2XX }, -- { .compatible = "mrvl,pwri2c", .data = (void *)REGS_PXA3XX }, -- { .compatible = "mrvl,mmp-twsi", .data = (void *)REGS_PXA910 }, -- { .compatible = "marvell,armada-3700-i2c", .data = (void *)REGS_A3700 }, -- {} --}; --MODULE_DEVICE_TABLE(of, i2c_pxa_dt_ids); -- - static int i2c_pxa_probe_dt(struct platform_device *pdev, struct pxa_i2c *i2c, - enum pxa_i2c_types *i2c_types) - { diff --git a/target/linux/generic/backport-5.4/813-v5.8-i2c-pxa-clean-up-decode_bits.patch b/target/linux/generic/backport-5.4/813-v5.8-i2c-pxa-clean-up-decode_bits.patch deleted file mode 100644 index adcf969ef6..0000000000 --- a/target/linux/generic/backport-5.4/813-v5.8-i2c-pxa-clean-up-decode_bits.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 11/17] i2c: pxa: clean up decode_bits() -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Clean up decode_bits() to use pr_cont(), and move the newline into the -function rather than at its two callsites. Avoid printing an -unnecessary space before the newline. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -287,13 +287,14 @@ struct bits { - static inline void - decode_bits(const char *prefix, const struct bits *bits, int num, u32 val) - { -- printk("%s %08x: ", prefix, val); -+ printk("%s %08x:", prefix, val); - while (num--) { - const char *str = val & bits->mask ? bits->set : bits->unset; - if (str) -- printk("%s ", str); -+ pr_cont(" %s", str); - bits++; - } -+ pr_cont("\n"); - } - - static const struct bits isr_bits[] = { -@@ -313,7 +314,6 @@ static const struct bits isr_bits[] = { - static void decode_ISR(unsigned int val) - { - decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val); -- printk("\n"); - } - - static const struct bits icr_bits[] = { -@@ -338,7 +338,6 @@ static const struct bits icr_bits[] = { - static void decode_ICR(unsigned int val) - { - decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val); -- printk("\n"); - } - #endif - diff --git a/target/linux/generic/backport-5.4/814-v5.8-i2c-pxa-fix-i2c_pxa_wait_bus_not_busy-boundary-condi.patch b/target/linux/generic/backport-5.4/814-v5.8-i2c-pxa-fix-i2c_pxa_wait_bus_not_busy-boundary-condi.patch deleted file mode 100644 index 2aadecc357..0000000000 --- a/target/linux/generic/backport-5.4/814-v5.8-i2c-pxa-fix-i2c_pxa_wait_bus_not_busy-boundary-condi.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Cc: linux-i2c@vger.kernel.org -Subject: [PATCH 12/17] i2c: pxa: fix i2c_pxa_wait_bus_not_busy() boundary - condition -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Fix i2c_pxa_wait_bus_not_busy()'s boundary conditions, so that a -coincidental success and timeout results in the function returning -success. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -417,19 +417,26 @@ static void i2c_pxa_abort(struct pxa_i2c - static int i2c_pxa_wait_bus_not_busy(struct pxa_i2c *i2c) - { - int timeout = DEF_TIMEOUT; -+ u32 isr; - -- while (timeout-- && readl(_ISR(i2c)) & (ISR_IBB | ISR_UB)) { -- if ((readl(_ISR(i2c)) & ISR_SAD) != 0) -+ while (1) { -+ isr = readl(_ISR(i2c)); -+ if (!(isr & (ISR_IBB | ISR_UB))) -+ return 0; -+ -+ if (isr & ISR_SAD) - timeout += 4; - -+ if (!timeout--) -+ break; -+ - msleep(2); - show_state(i2c); - } - -- if (timeout < 0) -- show_state(i2c); -+ show_state(i2c); - -- return timeout < 0 ? I2C_RETRY : 0; -+ return I2C_RETRY; - } - - static int i2c_pxa_wait_master(struct pxa_i2c *i2c) diff --git a/target/linux/generic/backport-5.4/815-v5.8-i2c-pxa-consolidate-i2c_pxa_-xfer-implementations.patch b/target/linux/generic/backport-5.4/815-v5.8-i2c-pxa-consolidate-i2c_pxa_-xfer-implementations.patch deleted file mode 100644 index 2debd4c86b..0000000000 --- a/target/linux/generic/backport-5.4/815-v5.8-i2c-pxa-consolidate-i2c_pxa_-xfer-implementations.patch +++ /dev/null @@ -1,91 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 1/7] i2c: pxa: consolidate i2c_pxa_*xfer() implementations -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Most of i2c_pxa_pio_xfer() and i2c_pxa_xfer() are identical; the only -differences are that i2c_pxa_pio_xfer() may reset the bus, and they -use different underlying transfer functions. The retry loop is the -same. Consolidate these two functions. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 36 ++++++++++++++++-------------------- - 1 file changed, 16 insertions(+), 20 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -1059,18 +1059,20 @@ static int i2c_pxa_do_xfer(struct pxa_i2 - return ret; - } - --static int i2c_pxa_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) -+static int i2c_pxa_internal_xfer(struct pxa_i2c *i2c, -+ struct i2c_msg *msgs, int num, -+ int (*xfer)(struct pxa_i2c *, -+ struct i2c_msg *, int num)) - { -- struct pxa_i2c *i2c = adap->algo_data; - int ret, i; - -- for (i = adap->retries; i >= 0; i--) { -- ret = i2c_pxa_do_xfer(i2c, msgs, num); -+ for (i = i2c->adap.retries; i >= 0; i--) { -+ ret = xfer(i2c, msgs, num); - if (ret != I2C_RETRY) - goto out; - - if (i2c_debug) -- dev_dbg(&adap->dev, "Retrying transmission\n"); -+ dev_dbg(&i2c->adap.dev, "Retrying transmission\n"); - udelay(100); - } - i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); -@@ -1080,6 +1082,14 @@ static int i2c_pxa_xfer(struct i2c_adapt - return ret; - } - -+static int i2c_pxa_xfer(struct i2c_adapter *adap, -+ struct i2c_msg msgs[], int num) -+{ -+ struct pxa_i2c *i2c = adap->algo_data; -+ -+ return i2c_pxa_internal_xfer(i2c, msgs, num, i2c_pxa_do_xfer); -+} -+ - static u32 i2c_pxa_functionality(struct i2c_adapter *adap) - { - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | -@@ -1163,7 +1173,6 @@ static int i2c_pxa_pio_xfer(struct i2c_a - struct i2c_msg msgs[], int num) - { - struct pxa_i2c *i2c = adap->algo_data; -- int ret, i; - - /* If the I2C controller is disabled we need to reset it - (probably due to a suspend/resume destroying state). We do -@@ -1172,20 +1181,7 @@ static int i2c_pxa_pio_xfer(struct i2c_a - if (!(readl(_ICR(i2c)) & ICR_IUE)) - i2c_pxa_reset(i2c); - -- for (i = adap->retries; i >= 0; i--) { -- ret = i2c_pxa_do_pio_xfer(i2c, msgs, num); -- if (ret != I2C_RETRY) -- goto out; -- -- if (i2c_debug) -- dev_dbg(&adap->dev, "Retrying transmission\n"); -- udelay(100); -- } -- i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); -- ret = -EREMOTEIO; -- out: -- i2c_pxa_set_slave(i2c, ret); -- return ret; -+ return i2c_pxa_internal_xfer(i2c, msgs, num, i2c_pxa_do_pio_xfer); - } - - static const struct i2c_algorithm i2c_pxa_pio_algorithm = { diff --git a/target/linux/generic/backport-5.4/816-v5.8-i2c-pxa-avoid-complaints-with-non-responsive-slaves.patch b/target/linux/generic/backport-5.4/816-v5.8-i2c-pxa-avoid-complaints-with-non-responsive-slaves.patch deleted file mode 100644 index 63e6db80ad..0000000000 --- a/target/linux/generic/backport-5.4/816-v5.8-i2c-pxa-avoid-complaints-with-non-responsive-slaves.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 2/7] i2c: pxa: avoid complaints with non-responsive slaves -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Running i2cdetect on a PXA I2C adapter is very noisy; it complains -whenever a slave fails to respond to the address cycle. Since it is -normal to probe for slaves in this way, we should not fill the kernel -log. This is especially true with SFP modules that take a while to -respond on the I2C bus, and probing via the I2C bus is the only way to -detect that they are ready. - -Fix this by changing the internal transfer return code from I2C_RETRY -to a new NO_SLAVE code (mapped to -ENXIO, as per the I2C documentation -for this condition, but we still return -EREMOTEIO to the I2C stack to -maintain long established driver behaviour.) - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -91,6 +91,7 @@ - */ - #define DEF_TIMEOUT 32 - -+#define NO_SLAVE (-ENXIO) - #define BUS_ERROR (-EREMOTEIO) - #define XFER_NAKED (-ECONNREFUSED) - #define I2C_RETRY (-2000) /* an error has occurred retry transmit */ -@@ -838,7 +839,7 @@ static void i2c_pxa_irq_txempty(struct p - */ - if (isr & ISR_ACKNAK) { - if (i2c->msg_ptr == 0 && i2c->msg_idx == 0) -- ret = I2C_RETRY; -+ ret = NO_SLAVE; - else - ret = XFER_NAKED; - } -@@ -1066,16 +1067,19 @@ static int i2c_pxa_internal_xfer(struct - { - int ret, i; - -- for (i = i2c->adap.retries; i >= 0; i--) { -+ for (i = 0; ; ) { - ret = xfer(i2c, msgs, num); -- if (ret != I2C_RETRY) -+ if (ret != I2C_RETRY && ret != NO_SLAVE) - goto out; -+ if (++i >= i2c->adap.retries) -+ break; - - if (i2c_debug) - dev_dbg(&i2c->adap.dev, "Retrying transmission\n"); - udelay(100); - } -- i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); -+ if (ret != NO_SLAVE) -+ i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); - ret = -EREMOTEIO; - out: - i2c_pxa_set_slave(i2c, ret); diff --git a/target/linux/generic/backport-5.4/817-v5.8-i2c-pxa-ensure-timeout-messages-are-unique.patch b/target/linux/generic/backport-5.4/817-v5.8-i2c-pxa-ensure-timeout-messages-are-unique.patch deleted file mode 100644 index 37a77b6c5c..0000000000 --- a/target/linux/generic/backport-5.4/817-v5.8-i2c-pxa-ensure-timeout-messages-are-unique.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 3/7] i2c: pxa: ensure timeout messages are unique -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Ensure that the various timeout messages can identify where in the code -they were produced from to aid debugging. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -1052,7 +1052,7 @@ static int i2c_pxa_do_xfer(struct pxa_i2 - ret = i2c->msg_idx; - - if (!timeout && i2c->msg_num) { -- i2c_pxa_scream_blue_murder(i2c, "timeout"); -+ i2c_pxa_scream_blue_murder(i2c, "timeout with active message"); - ret = I2C_RETRY; - } - -@@ -1122,7 +1122,7 @@ static int i2c_pxa_pio_set_master(struct - if (timeout < 0) { - show_state(i2c); - dev_err(&i2c->adap.dev, -- "i2c_pxa: timeout waiting for bus free\n"); -+ "i2c_pxa: timeout waiting for bus free (set_master)\n"); - return I2C_RETRY; - } - -@@ -1166,7 +1166,7 @@ static int i2c_pxa_do_pio_xfer(struct px - - out: - if (timeout == 0) { -- i2c_pxa_scream_blue_murder(i2c, "timeout"); -+ i2c_pxa_scream_blue_murder(i2c, "timeout (do_pio_xfer)"); - ret = I2C_RETRY; - } - diff --git a/target/linux/generic/backport-5.4/818-v5.8-i2c-pxa-remove-some-unnecessary-debug.patch b/target/linux/generic/backport-5.4/818-v5.8-i2c-pxa-remove-some-unnecessary-debug.patch deleted file mode 100644 index 5438588ded..0000000000 --- a/target/linux/generic/backport-5.4/818-v5.8-i2c-pxa-remove-some-unnecessary-debug.patch +++ /dev/null @@ -1,34 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 4/7] i2c: pxa: remove some unnecessary debug -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Remove unnecessary show_state() in the loop inside -i2c_pxa_pio_set_master(), which can be unnecessarily verbose. - -Remove the i2c_pxa_scream_blue_murder() in i2c_pxa_pio_xfer(), which -will trigger if we are probing the I2C bus and a slave does not -respond; this is a normal event, and not something to report. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -1114,10 +1114,8 @@ static int i2c_pxa_pio_set_master(struct - /* - * Wait for the bus to become free. - */ -- while (timeout-- && readl(_ISR(i2c)) & (ISR_IBB | ISR_UB)) { -+ while (timeout-- && readl(_ISR(i2c)) & (ISR_IBB | ISR_UB)) - udelay(1000); -- show_state(i2c); -- } - - if (timeout < 0) { - show_state(i2c); diff --git a/target/linux/generic/backport-5.4/820-v5.8-i2c-pxa-use-master-abort-for-device-probes.patch b/target/linux/generic/backport-5.4/820-v5.8-i2c-pxa-use-master-abort-for-device-probes.patch deleted file mode 100644 index cde9e3fe33..0000000000 --- a/target/linux/generic/backport-5.4/820-v5.8-i2c-pxa-use-master-abort-for-device-probes.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 6/7] i2c: pxa: use master-abort for device probes -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Use master-abort to send the stop condition after an address cycle -rather than resetting the controller. - -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -899,14 +899,8 @@ static void i2c_pxa_irq_txempty(struct p - icr &= ~ICR_ALDIE; - icr |= ICR_START | ICR_TB; - } else { -- if (i2c->msg->len == 0) { -- /* -- * Device probes have a message length of zero -- * and need the bus to be reset before it can -- * be used again. -- */ -- i2c_pxa_reset(i2c); -- } -+ if (i2c->msg->len == 0) -+ icr |= ICR_MA; - i2c_pxa_master_complete(i2c, 0); - } - diff --git a/target/linux/generic/backport-5.4/821-v5.8-i2c-pxa-implement-generic-i2c-bus-recovery.patch b/target/linux/generic/backport-5.4/821-v5.8-i2c-pxa-implement-generic-i2c-bus-recovery.patch deleted file mode 100644 index 592b763b6a..0000000000 --- a/target/linux/generic/backport-5.4/821-v5.8-i2c-pxa-implement-generic-i2c-bus-recovery.patch +++ /dev/null @@ -1,285 +0,0 @@ -From: Russell King <rmk+kernel@armlinux.org.uk> -Bcc: linux@mail.armlinux.org.uk -Subject: [PATCH 7/7] i2c: pxa: implement generic i2c bus recovery -MIME-Version: 1.0 -Content-Disposition: inline -Content-Transfer-Encoding: 8bit -Content-Type: text/plain; charset="utf-8" - -Implement generic GPIO-based I2C bus recovery for the PXA I2C driver. - -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> ---- - drivers/i2c/busses/i2c-pxa.c | 176 +++++++++++++++++++++++++++++++---- - 1 file changed, 159 insertions(+), 17 deletions(-) - ---- a/drivers/i2c/busses/i2c-pxa.c -+++ b/drivers/i2c/busses/i2c-pxa.c -@@ -20,6 +20,7 @@ - #include <linux/delay.h> - #include <linux/err.h> - #include <linux/errno.h> -+#include <linux/gpio/consumer.h> - #include <linux/i2c.h> - #include <linux/i2c-pxa.h> - #include <linux/init.h> -@@ -29,6 +30,7 @@ - #include <linux/module.h> - #include <linux/of.h> - #include <linux/of_device.h> -+#include <linux/pinctrl/consumer.h> - #include <linux/platform_device.h> - #include <linux/platform_data/i2c-pxa.h> - #include <linux/slab.h> -@@ -261,6 +263,11 @@ struct pxa_i2c { - bool highmode_enter; - u32 fm_mask; - u32 hs_mask; -+ -+ struct i2c_bus_recovery_info recovery; -+ struct pinctrl *pinctrl; -+ struct pinctrl_state *pinctrl_default; -+ struct pinctrl_state *pinctrl_recovery; - }; - - #define _IBMR(i2c) ((i2c)->reg_ibmr) -@@ -560,13 +567,8 @@ static void i2c_pxa_set_slave(struct pxa - #define i2c_pxa_set_slave(i2c, err) do { } while (0) - #endif - --static void i2c_pxa_reset(struct pxa_i2c *i2c) -+static void i2c_pxa_do_reset(struct pxa_i2c *i2c) - { -- pr_debug("Resetting I2C Controller Unit\n"); -- -- /* abort any transfer currently under way */ -- i2c_pxa_abort(i2c); -- - /* reset according to 9.8 */ - writel(ICR_UR, _ICR(i2c)); - writel(I2C_ISR_INIT, _ISR(i2c)); -@@ -585,12 +587,25 @@ static void i2c_pxa_reset(struct pxa_i2c - #endif - - i2c_pxa_set_slave(i2c, 0); -+} - -+static void i2c_pxa_enable(struct pxa_i2c *i2c) -+{ - /* enable unit */ - writel(readl(_ICR(i2c)) | ICR_IUE, _ICR(i2c)); - udelay(100); - } - -+static void i2c_pxa_reset(struct pxa_i2c *i2c) -+{ -+ pr_debug("Resetting I2C Controller Unit\n"); -+ -+ /* abort any transfer currently under way */ -+ i2c_pxa_abort(i2c); -+ i2c_pxa_do_reset(i2c); -+ i2c_pxa_enable(i2c); -+} -+ - - #ifdef CONFIG_I2C_PXA_SLAVE - /* -@@ -1002,6 +1017,7 @@ static int i2c_pxa_do_xfer(struct pxa_i2 - ret = i2c_pxa_wait_bus_not_busy(i2c); - if (ret) { - dev_err(&i2c->adap.dev, "i2c_pxa: timeout waiting for bus free\n"); -+ i2c_recover_bus(&i2c->adap); - goto out; - } - -@@ -1047,6 +1063,7 @@ static int i2c_pxa_do_xfer(struct pxa_i2 - - if (!timeout && i2c->msg_num) { - i2c_pxa_scream_blue_murder(i2c, "timeout with active message"); -+ i2c_recover_bus(&i2c->adap); - ret = I2C_RETRY; - } - -@@ -1228,6 +1245,129 @@ static int i2c_pxa_probe_pdata(struct pl - return 0; - } - -+static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap) -+{ -+ struct pxa_i2c *i2c = adap->algo_data; -+ u32 ibmr = readl(_IBMR(i2c)); -+ -+ /* -+ * Program the GPIOs to reflect the current I2C bus state while -+ * we transition to recovery; this avoids glitching the bus. -+ */ -+ gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS); -+ gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS); -+ -+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery)); -+} -+ -+static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap) -+{ -+ struct pxa_i2c *i2c = adap->algo_data; -+ u32 isr; -+ -+ /* -+ * The bus should now be free. Clear up the I2C controller before -+ * handing control of the bus back to avoid the bus changing state. -+ */ -+ isr = readl(_ISR(i2c)); -+ if (isr & (ISR_UB | ISR_IBB)) { -+ dev_dbg(&i2c->adap.dev, -+ "recovery: resetting controller, ISR=0x%08x\n", isr); -+ i2c_pxa_do_reset(i2c); -+ } -+ -+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default)); -+ -+ dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n", -+ readl(_IBMR(i2c)), readl(_ISR(i2c))); -+ -+ i2c_pxa_enable(i2c); -+} -+ -+static int i2c_pxa_init_recovery(struct pxa_i2c *i2c) -+{ -+ struct i2c_bus_recovery_info *bri = &i2c->recovery; -+ struct device *dev = i2c->adap.dev.parent; -+ -+ /* -+ * When slave mode is enabled, we are not the only master on the bus. -+ * Bus recovery can only be performed when we are the master, which -+ * we can't be certain of. Therefore, when slave mode is enabled, do -+ * not configure bus recovery. -+ */ -+ if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE)) -+ return 0; -+ -+ i2c->pinctrl = devm_pinctrl_get(dev); -+ if (IS_ERR(i2c->pinctrl)) -+ return PTR_ERR(i2c->pinctrl); -+ -+ if (!i2c->pinctrl) -+ return 0; -+ -+ i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl, -+ PINCTRL_STATE_DEFAULT); -+ i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery"); -+ -+ if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) { -+ dev_info(dev, "missing pinmux recovery information: %ld %ld\n", -+ PTR_ERR(i2c->pinctrl_default), -+ PTR_ERR(i2c->pinctrl_recovery)); -+ return 0; -+ } -+ -+ /* -+ * Claiming GPIOs can influence the pinmux state, and may glitch the -+ * I2C bus. Do this carefully. -+ */ -+ bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); -+ if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER)) -+ return -EPROBE_DEFER; -+ if (IS_ERR(bri->scl_gpiod)) { -+ dev_info(dev, "missing scl gpio recovery information: %pe\n", -+ bri->scl_gpiod); -+ return 0; -+ } -+ -+ /* -+ * We have SCL. Pull SCL low and wait a bit so that SDA glitches -+ * have no effect. -+ */ -+ gpiod_direction_output(bri->scl_gpiod, 0); -+ udelay(10); -+ bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN); -+ -+ /* Wait a bit in case of a SDA glitch, and then release SCL. */ -+ udelay(10); -+ gpiod_direction_output(bri->scl_gpiod, 1); -+ -+ if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER)) -+ return -EPROBE_DEFER; -+ -+ if (IS_ERR(bri->sda_gpiod)) { -+ dev_info(dev, "missing sda gpio recovery information: %pe\n", -+ bri->sda_gpiod); -+ return 0; -+ } -+ -+ bri->prepare_recovery = i2c_pxa_prepare_recovery; -+ bri->unprepare_recovery = i2c_pxa_unprepare_recovery; -+ bri->recover_bus = i2c_generic_scl_recovery; -+ -+ i2c->adap.bus_recovery_info = bri; -+ -+ /* -+ * Claiming GPIOs can change the pinmux state, which confuses the -+ * pinctrl since pinctrl's idea of the current setting is unaffected -+ * by the pinmux change caused by claiming the GPIO. Work around that -+ * by switching pinctrl to the GPIO state here. We do it this way to -+ * avoid glitching the I2C bus. -+ */ -+ pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery); -+ -+ return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default); -+} -+ - static int i2c_pxa_probe(struct platform_device *dev) - { - struct i2c_pxa_platform_data *plat = dev_get_platdata(&dev->dev); -@@ -1240,6 +1380,16 @@ static int i2c_pxa_probe(struct platform - if (!i2c) - return -ENOMEM; - -+ /* Default adapter num to device id; i2c_pxa_probe_dt can override. */ -+ i2c->adap.nr = dev->id; -+ i2c->adap.owner = THIS_MODULE; -+ i2c->adap.retries = 5; -+ i2c->adap.algo_data = i2c; -+ i2c->adap.dev.parent = &dev->dev; -+#ifdef CONFIG_OF -+ i2c->adap.dev.of_node = dev->dev.of_node; -+#endif -+ - res = platform_get_resource(dev, IORESOURCE_MEM, 0); - i2c->reg_base = devm_ioremap_resource(&dev->dev, res); - if (IS_ERR(i2c->reg_base)) -@@ -1251,8 +1401,9 @@ static int i2c_pxa_probe(struct platform - return irq; - } - -- /* Default adapter num to device id; i2c_pxa_probe_dt can override. */ -- i2c->adap.nr = dev->id; -+ ret = i2c_pxa_init_recovery(i2c); -+ if (ret) -+ return ret; - - ret = i2c_pxa_probe_dt(dev, i2c, &i2c_type); - if (ret > 0) -@@ -1260,9 +1411,6 @@ static int i2c_pxa_probe(struct platform - if (ret < 0) - return ret; - -- i2c->adap.owner = THIS_MODULE; -- i2c->adap.retries = 5; -- - spin_lock_init(&i2c->lock); - init_waitqueue_head(&i2c->wait); - -@@ -1332,12 +1480,6 @@ static int i2c_pxa_probe(struct platform - - i2c_pxa_reset(i2c); - -- i2c->adap.algo_data = i2c; -- i2c->adap.dev.parent = &dev->dev; --#ifdef CONFIG_OF -- i2c->adap.dev.of_node = dev->dev.of_node; --#endif -- - ret = i2c_add_numbered_adapter(&i2c->adap); - if (ret < 0) - goto ereqirq; diff --git a/target/linux/generic/backport-5.4/825-v5.8-spi-rb4xx-null-pointer-bug-fix.patch b/target/linux/generic/backport-5.4/825-v5.8-spi-rb4xx-null-pointer-bug-fix.patch deleted file mode 100644 index 71e26d50da..0000000000 --- a/target/linux/generic/backport-5.4/825-v5.8-spi-rb4xx-null-pointer-bug-fix.patch +++ /dev/null @@ -1,48 +0,0 @@ -From: Christopher Hill <ch6574@gmail.com> -To: Mark Brown <broonie@kernel.org> -Cc: Christopher Hill <ch6574@gmail.com>, linux-spi@vger.kernel.org, - linux-kernel@vger.kernel.org -Subject: [PATCH 1/3] spi: rb4xx: null pointer bug fix -Date: Thu, 21 May 2020 14:36:29 -0400 -Message-Id: <20200521183631.37806-1-ch6574@gmail.com> -X-Mailer: git-send-email 2.25.1 -MIME-Version: 1.0 -Sender: linux-spi-owner@vger.kernel.org -Precedence: bulk -List-ID: <linux-spi.vger.kernel.org> -X-Mailing-List: linux-spi@vger.kernel.org - -This patch fixes a null pointer bug in the spi driver spi-rb4xx.c by -moving the private data initialization to earlier in probe - -Signed-off-by: Christopher Hill <ch6574@gmail.com> ---- - drivers/spi/spi-rb4xx.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - ---- a/drivers/spi/spi-rb4xx.c -+++ b/drivers/spi/spi-rb4xx.c -@@ -158,6 +158,11 @@ static int rb4xx_spi_probe(struct platfo - master->transfer_one = rb4xx_transfer_one; - master->set_cs = rb4xx_set_cs; - -+ rbspi = spi_master_get_devdata(master); -+ rbspi->base = spi_base; -+ rbspi->clk = ahb_clk; -+ platform_set_drvdata(pdev, rbspi); -+ - err = devm_spi_register_master(&pdev->dev, master); - if (err) { - dev_err(&pdev->dev, "failed to register SPI master\n"); -@@ -168,11 +173,6 @@ static int rb4xx_spi_probe(struct platfo - if (err) - return err; - -- rbspi = spi_master_get_devdata(master); -- rbspi->base = spi_base; -- rbspi->clk = ahb_clk; -- platform_set_drvdata(pdev, rbspi); -- - /* Enable SPI */ - rb4xx_write(rbspi, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO); - diff --git a/target/linux/generic/backport-5.4/826-v5.8-spi-rb4xx-update-driver-to-be-device-tree-aware.patch b/target/linux/generic/backport-5.4/826-v5.8-spi-rb4xx-update-driver-to-be-device-tree-aware.patch deleted file mode 100644 index 0ce4f2bb35..0000000000 --- a/target/linux/generic/backport-5.4/826-v5.8-spi-rb4xx-update-driver-to-be-device-tree-aware.patch +++ /dev/null @@ -1,60 +0,0 @@ -From: Christopher Hill <ch6574@gmail.com> -To: Mark Brown <broonie@kernel.org> -Cc: Christopher Hill <ch6574@gmail.com>, linux-spi@vger.kernel.org, - linux-kernel@vger.kernel.org -Subject: [PATCH 2/3] spi: rb4xx: update driver to be device tree aware -Date: Thu, 21 May 2020 14:36:30 -0400 -Message-Id: <20200521183631.37806-2-ch6574@gmail.com> -X-Mailer: git-send-email 2.25.1 -In-Reply-To: <20200521183631.37806-1-ch6574@gmail.com> -References: <20200521183631.37806-1-ch6574@gmail.com> -MIME-Version: 1.0 -Sender: linux-spi-owner@vger.kernel.org -Precedence: bulk -List-ID: <linux-spi.vger.kernel.org> -X-Mailing-List: linux-spi@vger.kernel.org - -This patch updates the spi driver spi-rb4xx.c to be device tree aware - -Signed-off-by: Christopher Hill <ch6574@gmail.com> ---- - drivers/spi/spi-rb4xx.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - ---- a/drivers/spi/spi-rb4xx.c -+++ b/drivers/spi/spi-rb4xx.c -@@ -14,6 +14,7 @@ - #include <linux/platform_device.h> - #include <linux/clk.h> - #include <linux/spi/spi.h> -+#include <linux/of.h> - - #include <asm/mach-ath79/ar71xx_regs.h> - -@@ -150,6 +151,7 @@ static int rb4xx_spi_probe(struct platfo - if (IS_ERR(ahb_clk)) - return PTR_ERR(ahb_clk); - -+ master->dev.of_node = pdev->dev.of_node; - master->bus_num = 0; - master->num_chipselect = 3; - master->mode_bits = SPI_TX_DUAL; -@@ -188,11 +190,18 @@ static int rb4xx_spi_remove(struct platf - return 0; - } - -+static const struct of_device_id rb4xx_spi_dt_match[] = { -+ { .compatible = "mikrotik,rb4xx-spi" }, -+ { }, -+}; -+MODULE_DEVICE_TABLE(of, rb4xx_spi_dt_match); -+ - static struct platform_driver rb4xx_spi_drv = { - .probe = rb4xx_spi_probe, - .remove = rb4xx_spi_remove, - .driver = { - .name = "rb4xx-spi", -+ .of_match_table = of_match_ptr(rb4xx_spi_dt_match), - }, - }; - diff --git a/target/linux/generic/backport-5.4/831-v5.13-0001-firmware-bcm47xx_nvram-rename-finding-function-and-i.patch b/target/linux/generic/backport-5.4/831-v5.13-0001-firmware-bcm47xx_nvram-rename-finding-function-and-i.patch deleted file mode 100644 index 19938704b7..0000000000 --- a/target/linux/generic/backport-5.4/831-v5.13-0001-firmware-bcm47xx_nvram-rename-finding-function-and-i.patch +++ /dev/null @@ -1,80 +0,0 @@ -From fb009cbdd0693bd633f11e99526617b3d392cfad Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 8 Mar 2021 10:03:16 +0100 -Subject: [PATCH] firmware: bcm47xx_nvram: rename finding function and its - variables -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -1. Use "bcm47xx_" function name prefix for consistency -2. It takes flash start as argument so s/iobase/flash_start/ -3. "off" was used for finding flash end so just call it "flash_size" - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - drivers/firmware/broadcom/bcm47xx_nvram.c | 24 ++++++++++++----------- - 1 file changed, 13 insertions(+), 11 deletions(-) - ---- a/drivers/firmware/broadcom/bcm47xx_nvram.c -+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c -@@ -48,11 +48,13 @@ static u32 find_nvram_size(void __iomem - return 0; - } - --/* Probe for NVRAM header */ --static int nvram_find_and_copy(void __iomem *iobase, u32 lim) -+/** -+ * bcm47xx_nvram_find_and_copy - find NVRAM on flash mapping & copy it -+ */ -+static int bcm47xx_nvram_find_and_copy(void __iomem *flash_start, size_t res_size) - { - struct nvram_header __iomem *header; -- u32 off; -+ size_t flash_size; - u32 size; - - if (nvram_len) { -@@ -61,25 +63,25 @@ static int nvram_find_and_copy(void __io - } - - /* TODO: when nvram is on nand flash check for bad blocks first. */ -- off = FLASH_MIN; -- while (off <= lim) { -+ flash_size = FLASH_MIN; -+ while (flash_size <= res_size) { - /* Windowed flash access */ -- size = find_nvram_size(iobase + off); -+ size = find_nvram_size(flash_start + flash_size); - if (size) { -- header = (struct nvram_header *)(iobase + off - size); -+ header = (struct nvram_header *)(flash_start + flash_size - size); - goto found; - } -- off <<= 1; -+ flash_size <<= 1; - } - - /* Try embedded NVRAM at 4 KB and 1 KB as last resorts */ -- header = (struct nvram_header *)(iobase + 4096); -+ header = (struct nvram_header *)(flash_start + 4096); - if (header->magic == NVRAM_MAGIC) { - size = NVRAM_SPACE; - goto found; - } - -- header = (struct nvram_header *)(iobase + 1024); -+ header = (struct nvram_header *)(flash_start + 1024); - if (header->magic == NVRAM_MAGIC) { - size = NVRAM_SPACE; - goto found; -@@ -124,7 +126,7 @@ int bcm47xx_nvram_init_from_mem(u32 base - if (!iobase) - return -ENOMEM; - -- err = nvram_find_and_copy(iobase, lim); -+ err = bcm47xx_nvram_find_and_copy(iobase, lim); - - iounmap(iobase); - diff --git a/target/linux/generic/backport-5.4/831-v5.13-0002-firmware-bcm47xx_nvram-add-helper-checking-for-NVRAM.patch b/target/linux/generic/backport-5.4/831-v5.13-0002-firmware-bcm47xx_nvram-add-helper-checking-for-NVRAM.patch deleted file mode 100644 index 6ab072883d..0000000000 --- a/target/linux/generic/backport-5.4/831-v5.13-0002-firmware-bcm47xx_nvram-add-helper-checking-for-NVRAM.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 0a24b51a3264a3f942a75025ea5ff6133c8989b0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 8 Mar 2021 10:03:17 +0100 -Subject: [PATCH] firmware: bcm47xx_nvram: add helper checking for NVRAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This avoids duplicating code doing casting and checking for NVRAM magic. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - drivers/firmware/broadcom/bcm47xx_nvram.c | 30 ++++++++++++++--------- - 1 file changed, 18 insertions(+), 12 deletions(-) - ---- a/drivers/firmware/broadcom/bcm47xx_nvram.c -+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c -@@ -34,14 +34,20 @@ static char nvram_buf[NVRAM_SPACE]; - static size_t nvram_len; - static const u32 nvram_sizes[] = {0x6000, 0x8000, 0xF000, 0x10000}; - -+/** -+ * bcm47xx_nvram_is_valid - check for a valid NVRAM at specified memory -+ */ -+static bool bcm47xx_nvram_is_valid(void __iomem *nvram) -+{ -+ return ((struct nvram_header *)nvram)->magic == NVRAM_MAGIC; -+} -+ - static u32 find_nvram_size(void __iomem *end) - { -- struct nvram_header __iomem *header; - int i; - - for (i = 0; i < ARRAY_SIZE(nvram_sizes); i++) { -- header = (struct nvram_header *)(end - nvram_sizes[i]); -- if (header->magic == NVRAM_MAGIC) -+ if (bcm47xx_nvram_is_valid(end - nvram_sizes[i])) - return nvram_sizes[i]; - } - -@@ -55,6 +61,7 @@ static int bcm47xx_nvram_find_and_copy(v - { - struct nvram_header __iomem *header; - size_t flash_size; -+ size_t offset; - u32 size; - - if (nvram_len) { -@@ -68,31 +75,30 @@ static int bcm47xx_nvram_find_and_copy(v - /* Windowed flash access */ - size = find_nvram_size(flash_start + flash_size); - if (size) { -- header = (struct nvram_header *)(flash_start + flash_size - size); -+ offset = flash_size - size; - goto found; - } - flash_size <<= 1; - } - - /* Try embedded NVRAM at 4 KB and 1 KB as last resorts */ -- header = (struct nvram_header *)(flash_start + 4096); -- if (header->magic == NVRAM_MAGIC) { -- size = NVRAM_SPACE; -+ -+ offset = 4096; -+ if (bcm47xx_nvram_is_valid(flash_start + offset)) - goto found; -- } - -- header = (struct nvram_header *)(flash_start + 1024); -- if (header->magic == NVRAM_MAGIC) { -- size = NVRAM_SPACE; -+ offset = 1024; -+ if (bcm47xx_nvram_is_valid(flash_start + offset)) - goto found; -- } - - pr_err("no nvram found\n"); - return -ENXIO; - - found: -+ header = (struct nvram_header *)(flash_start + offset); - __ioread32_copy(nvram_buf, header, sizeof(*header) / 4); - nvram_len = ((struct nvram_header *)(nvram_buf))->len; -+ size = res_size - offset; - if (nvram_len > size) { - pr_err("The nvram size according to the header seems to be bigger than the partition on flash\n"); - nvram_len = size; diff --git a/target/linux/generic/backport-5.4/831-v5.13-0003-firmware-bcm47xx_nvram-extract-code-copying-NVRAM.patch b/target/linux/generic/backport-5.4/831-v5.13-0003-firmware-bcm47xx_nvram-extract-code-copying-NVRAM.patch deleted file mode 100644 index a1351f1197..0000000000 --- a/target/linux/generic/backport-5.4/831-v5.13-0003-firmware-bcm47xx_nvram-extract-code-copying-NVRAM.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 298923cf999cecd2ef06df126f85a3d68da8c4d8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 8 Mar 2021 10:03:18 +0100 -Subject: [PATCH] firmware: bcm47xx_nvram: extract code copying NVRAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This simplifies function finding NVRAM. It doesn't directly deal with -NVRAM structure anymore and is a bit smaller. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - drivers/firmware/broadcom/bcm47xx_nvram.c | 43 +++++++++++++---------- - 1 file changed, 25 insertions(+), 18 deletions(-) - ---- a/drivers/firmware/broadcom/bcm47xx_nvram.c -+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c -@@ -55,11 +55,34 @@ static u32 find_nvram_size(void __iomem - } - - /** -+ * bcm47xx_nvram_copy - copy NVRAM to internal buffer -+ */ -+static void bcm47xx_nvram_copy(void __iomem *nvram_start, size_t res_size) -+{ -+ struct nvram_header __iomem *header = nvram_start; -+ size_t copy_size; -+ -+ copy_size = header->len; -+ if (copy_size > res_size) { -+ pr_err("The nvram size according to the header seems to be bigger than the partition on flash\n"); -+ copy_size = res_size; -+ } -+ if (copy_size >= NVRAM_SPACE) { -+ pr_err("nvram on flash (%zu bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n", -+ copy_size, NVRAM_SPACE - 1); -+ copy_size = NVRAM_SPACE - 1; -+ } -+ -+ __ioread32_copy(nvram_buf, nvram_start, DIV_ROUND_UP(copy_size, 4)); -+ nvram_buf[NVRAM_SPACE - 1] = '\0'; -+ nvram_len = copy_size; -+} -+ -+/** - * bcm47xx_nvram_find_and_copy - find NVRAM on flash mapping & copy it - */ - static int bcm47xx_nvram_find_and_copy(void __iomem *flash_start, size_t res_size) - { -- struct nvram_header __iomem *header; - size_t flash_size; - size_t offset; - u32 size; -@@ -95,23 +118,7 @@ static int bcm47xx_nvram_find_and_copy(v - return -ENXIO; - - found: -- header = (struct nvram_header *)(flash_start + offset); -- __ioread32_copy(nvram_buf, header, sizeof(*header) / 4); -- nvram_len = ((struct nvram_header *)(nvram_buf))->len; -- size = res_size - offset; -- if (nvram_len > size) { -- pr_err("The nvram size according to the header seems to be bigger than the partition on flash\n"); -- nvram_len = size; -- } -- if (nvram_len >= NVRAM_SPACE) { -- pr_err("nvram on flash (%zu bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n", -- nvram_len, NVRAM_SPACE - 1); -- nvram_len = NVRAM_SPACE - 1; -- } -- /* proceed reading data after header */ -- __ioread32_copy(nvram_buf + sizeof(*header), header + 1, -- DIV_ROUND_UP(nvram_len, 4)); -- nvram_buf[NVRAM_SPACE - 1] = '\0'; -+ bcm47xx_nvram_copy(flash_start + offset, res_size - offset); - - return 0; - } diff --git a/target/linux/generic/backport-5.4/831-v5.13-0004-firmware-bcm47xx_nvram-look-for-NVRAM-with-for-inste.patch b/target/linux/generic/backport-5.4/831-v5.13-0004-firmware-bcm47xx_nvram-look-for-NVRAM-with-for-inste.patch deleted file mode 100644 index 059a13220b..0000000000 --- a/target/linux/generic/backport-5.4/831-v5.13-0004-firmware-bcm47xx_nvram-look-for-NVRAM-with-for-inste.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 98b68324f67236e8c9152976535dc1f27fb67ba8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 8 Mar 2021 10:03:19 +0100 -Subject: [PATCH] firmware: bcm47xx_nvram: look for NVRAM with for instead of - while -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This loop requires variable initialization, stop condition and post -iteration increment. It's pretty much a for loop definition. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - drivers/firmware/broadcom/bcm47xx_nvram.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - ---- a/drivers/firmware/broadcom/bcm47xx_nvram.c -+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c -@@ -93,15 +93,13 @@ static int bcm47xx_nvram_find_and_copy(v - } - - /* TODO: when nvram is on nand flash check for bad blocks first. */ -- flash_size = FLASH_MIN; -- while (flash_size <= res_size) { -+ for (flash_size = FLASH_MIN; flash_size <= res_size; flash_size <<= 1) { - /* Windowed flash access */ - size = find_nvram_size(flash_start + flash_size); - if (size) { - offset = flash_size - size; - goto found; - } -- flash_size <<= 1; - } - - /* Try embedded NVRAM at 4 KB and 1 KB as last resorts */ diff --git a/target/linux/generic/backport-5.4/831-v5.13-0005-firmware-bcm47xx_nvram-inline-code-checking-NVRAM-si.patch b/target/linux/generic/backport-5.4/831-v5.13-0005-firmware-bcm47xx_nvram-inline-code-checking-NVRAM-si.patch deleted file mode 100644 index 21d250049e..0000000000 --- a/target/linux/generic/backport-5.4/831-v5.13-0005-firmware-bcm47xx_nvram-inline-code-checking-NVRAM-si.patch +++ /dev/null @@ -1,70 +0,0 @@ -From f52da4ccfec9192e17f5c16260dfdd6d3ea76f65 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl> -Date: Mon, 8 Mar 2021 10:03:20 +0100 -Subject: [PATCH] firmware: bcm47xx_nvram: inline code checking NVRAM size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Separated function was not improving code quality much (or at all). -Moreover it expected possible flash end address as argument and it was -returning NVRAM size. - -The new code always operates on offsets which means less logic and less -calculations. - -Signed-off-by: Rafał Miłecki <rafal@milecki.pl> -Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> ---- - drivers/firmware/broadcom/bcm47xx_nvram.c | 25 +++++++---------------- - 1 file changed, 7 insertions(+), 18 deletions(-) - ---- a/drivers/firmware/broadcom/bcm47xx_nvram.c -+++ b/drivers/firmware/broadcom/bcm47xx_nvram.c -@@ -42,18 +42,6 @@ static bool bcm47xx_nvram_is_valid(void - return ((struct nvram_header *)nvram)->magic == NVRAM_MAGIC; - } - --static u32 find_nvram_size(void __iomem *end) --{ -- int i; -- -- for (i = 0; i < ARRAY_SIZE(nvram_sizes); i++) { -- if (bcm47xx_nvram_is_valid(end - nvram_sizes[i])) -- return nvram_sizes[i]; -- } -- -- return 0; --} -- - /** - * bcm47xx_nvram_copy - copy NVRAM to internal buffer - */ -@@ -85,7 +73,7 @@ static int bcm47xx_nvram_find_and_copy(v - { - size_t flash_size; - size_t offset; -- u32 size; -+ int i; - - if (nvram_len) { - pr_warn("nvram already initialized\n"); -@@ -93,12 +81,13 @@ static int bcm47xx_nvram_find_and_copy(v - } - - /* TODO: when nvram is on nand flash check for bad blocks first. */ -+ -+ /* Try every possible flash size and check for NVRAM at its end */ - for (flash_size = FLASH_MIN; flash_size <= res_size; flash_size <<= 1) { -- /* Windowed flash access */ -- size = find_nvram_size(flash_start + flash_size); -- if (size) { -- offset = flash_size - size; -- goto found; -+ for (i = 0; i < ARRAY_SIZE(nvram_sizes); i++) { -+ offset = flash_size - nvram_sizes[i]; -+ if (bcm47xx_nvram_is_valid(flash_start + offset)) -+ goto found; - } - } - diff --git a/target/linux/generic/backport-5.4/850-v5.13-usb-ehci-add-spurious-flag-to-disable-overcurrent-ch.patch b/target/linux/generic/backport-5.4/850-v5.13-usb-ehci-add-spurious-flag-to-disable-overcurrent-ch.patch deleted file mode 100644 index 494c88a496..0000000000 --- a/target/linux/generic/backport-5.4/850-v5.13-usb-ehci-add-spurious-flag-to-disable-overcurrent-ch.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 2d5ba37461013253d2ff0a3641b727fd32ea97a9 Mon Sep 17 00:00:00 2001 -From: Florian Fainelli <florian@openwrt.org> -Date: Tue, 23 Feb 2021 18:44:53 +0100 -Subject: [PATCH 1/3] usb: ehci: add spurious flag to disable overcurrent - checking -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch adds an ignore_oc flag which can be set by EHCI controller -not supporting or wanting to disable overcurrent checking. The EHCI -platform data in include/linux/usb/ehci_pdriver.h is also augmented to -take advantage of this new flag. - -Signed-off-by: Florian Fainelli <florian@openwrt.org> -Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com> -Link: https://lore.kernel.org/r/20210223174455.1378-2-noltari@gmail.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - drivers/usb/host/ehci-hcd.c | 2 +- - drivers/usb/host/ehci-hub.c | 4 ++-- - drivers/usb/host/ehci-platform.c | 2 ++ - drivers/usb/host/ehci.h | 1 + - include/linux/usb/ehci_pdriver.h | 1 + - 5 files changed, 7 insertions(+), 3 deletions(-) - ---- a/drivers/usb/host/ehci-hcd.c -+++ b/drivers/usb/host/ehci-hcd.c -@@ -660,7 +660,7 @@ static int ehci_run (struct usb_hcd *hcd - "USB %x.%x started, EHCI %x.%02x%s\n", - ((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f), - temp >> 8, temp & 0xff, -- ignore_oc ? ", overcurrent ignored" : ""); -+ (ignore_oc || ehci->spurious_oc) ? ", overcurrent ignored" : ""); - - ehci_writel(ehci, INTR_MASK, - &ehci->regs->intr_enable); /* Turn On Interrupts */ ---- a/drivers/usb/host/ehci-hub.c -+++ b/drivers/usb/host/ehci-hub.c -@@ -643,7 +643,7 @@ ehci_hub_status_data (struct usb_hcd *hc - * always set, seem to clear PORT_OCC and PORT_CSC when writing to - * PORT_POWER; that's surprising, but maybe within-spec. - */ -- if (!ignore_oc) -+ if (!ignore_oc && !ehci->spurious_oc) - mask = PORT_CSC | PORT_PEC | PORT_OCC; - else - mask = PORT_CSC | PORT_PEC; -@@ -1013,7 +1013,7 @@ int ehci_hub_control( - if (temp & PORT_PEC) - status |= USB_PORT_STAT_C_ENABLE << 16; - -- if ((temp & PORT_OCC) && !ignore_oc){ -+ if ((temp & PORT_OCC) && (!ignore_oc && !ehci->spurious_oc)){ - status |= USB_PORT_STAT_C_OVERCURRENT << 16; - - /* ---- a/drivers/usb/host/ehci-platform.c -+++ b/drivers/usb/host/ehci-platform.c -@@ -325,6 +325,8 @@ static int ehci_platform_probe(struct pl - hcd->has_tt = 1; - if (pdata->reset_on_resume) - priv->reset_on_resume = true; -+ if (pdata->spurious_oc) -+ ehci->spurious_oc = 1; - - #ifndef CONFIG_USB_EHCI_BIG_ENDIAN_MMIO - if (ehci->big_endian_mmio) { ---- a/drivers/usb/host/ehci.h -+++ b/drivers/usb/host/ehci.h -@@ -219,6 +219,7 @@ struct ehci_hcd { /* one per controlle - unsigned need_oc_pp_cycle:1; /* MPC834X port power */ - unsigned imx28_write_fix:1; /* For Freescale i.MX28 */ - unsigned is_aspeed:1; -+ unsigned spurious_oc:1; - - /* required for usb32 quirk */ - #define OHCI_CTRL_HCFS (3 << 6) ---- a/include/linux/usb/ehci_pdriver.h -+++ b/include/linux/usb/ehci_pdriver.h -@@ -50,6 +50,7 @@ struct usb_ehci_pdata { - unsigned no_io_watchdog:1; - unsigned reset_on_resume:1; - unsigned dma_mask_64:1; -+ unsigned spurious_oc:1; - - /* Turn on all power and clocks */ - int (*power_on)(struct platform_device *pdev); diff --git a/target/linux/generic/backport-5.4/851-v5.13-usb-host-ehci-platform-add-spurious_oc-DT-support.patch b/target/linux/generic/backport-5.4/851-v5.13-usb-host-ehci-platform-add-spurious_oc-DT-support.patch deleted file mode 100644 index 6faefeb79c..0000000000 --- a/target/linux/generic/backport-5.4/851-v5.13-usb-host-ehci-platform-add-spurious_oc-DT-support.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 4da57dbbffdfa7fe4e2b70b047fc5ff95ff25a3d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= <noltari@gmail.com> -Date: Tue, 23 Feb 2021 18:44:55 +0100 -Subject: [PATCH 3/3] usb: host: ehci-platform: add spurious_oc DT support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Over-current reporting isn't supported on some platforms such as bcm63xx. -These devices will incorrectly report over-current if this flag isn't properly -activated. - -Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com> -Link: https://lore.kernel.org/r/20210223174455.1378-4-noltari@gmail.com -Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> ---- - drivers/usb/host/ehci-platform.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/drivers/usb/host/ehci-platform.c -+++ b/drivers/usb/host/ehci-platform.c -@@ -278,6 +278,9 @@ static int ehci_platform_probe(struct pl - if (of_property_read_bool(dev->dev.of_node, "big-endian")) - ehci->big_endian_mmio = ehci->big_endian_desc = 1; - -+ if (of_property_read_bool(dev->dev.of_node, "spurious-oc")) -+ ehci->spurious_oc = 1; -+ - if (of_property_read_bool(dev->dev.of_node, - "needs-reset-on-resume")) - priv->reset_on_resume = true; diff --git a/target/linux/generic/backport-5.4/852-v5.10-0001-net-sfp-VSOL-V2801F-CarlitoxxPro-CPGOS03-0490-v2.0-w.patch b/target/linux/generic/backport-5.4/852-v5.10-0001-net-sfp-VSOL-V2801F-CarlitoxxPro-CPGOS03-0490-v2.0-w.patch deleted file mode 100644 index 1901054a10..0000000000 --- a/target/linux/generic/backport-5.4/852-v5.10-0001-net-sfp-VSOL-V2801F-CarlitoxxPro-CPGOS03-0490-v2.0-w.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 0d035bed2a4a6c4878518749348be61bf082d12a Mon Sep 17 00:00:00 2001 -From: Russell King <rmk+kernel@armlinux.org.uk> -Date: Wed, 9 Dec 2020 11:22:49 +0000 -Subject: [PATCH] net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 - workaround -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add a workaround for the detection of VSOL V2801F / CarlitoxxPro -CPGOS03-0490 v2.0 GPON module which CarlitoxxPro states needs single -byte I2C reads to the EEPROM. - -Pali Rohár reports that he also has a CarlitoxxPro-based V2801F module, -which reports a manufacturer of "OEM". This manufacturer can't be -matched as it appears in many different modules, so also match the part -number too. - -Reported-by: Thomas Schreiber <tschreibe@gmail.com> -Reported-by: Pali Rohár <pali@kernel.org> -Tested-by: Pali Rohár <pali@kernel.org> -Reviewed-by: Andrew Lunn <andrew@lunn.ch> -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> -Signed-off-by: David S. Miller <davem@davemloft.net> ---- - drivers/net/phy/sfp.c | 63 +++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 58 insertions(+), 5 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -191,6 +191,7 @@ struct sfp { - struct sfp_bus *sfp_bus; - struct phy_device *mod_phy; - const struct sff_data *type; -+ size_t i2c_block_size; - u32 max_power_mW; - - unsigned int (*get_state)(struct sfp *); -@@ -305,10 +306,19 @@ static int sfp_i2c_read(struct sfp *sfp, - size_t len) - { - struct i2c_msg msgs[2]; -- u8 bus_addr = a2 ? 0x51 : 0x50; -+ size_t block_size; - size_t this_len; -+ u8 bus_addr; - int ret; - -+ if (a2) { -+ block_size = 16; -+ bus_addr = 0x51; -+ } else { -+ block_size = sfp->i2c_block_size; -+ bus_addr = 0x50; -+ } -+ - msgs[0].addr = bus_addr; - msgs[0].flags = 0; - msgs[0].len = 1; -@@ -320,8 +330,8 @@ static int sfp_i2c_read(struct sfp *sfp, - - while (len) { - this_len = len; -- if (this_len > 16) -- this_len = 16; -+ if (this_len > block_size) -+ this_len = block_size; - - msgs[1].len = this_len; - -@@ -1569,6 +1579,28 @@ static int sfp_sm_mod_hpower(struct sfp - return 0; - } - -+/* Some modules (Nokia 3FE46541AA) lock up if byte 0x51 is read as a -+ * single read. Switch back to reading 16 byte blocks unless we have -+ * a CarlitoxxPro module (rebranded VSOL V2801F). Even more annoyingly, -+ * some VSOL V2801F have the vendor name changed to OEM. -+ */ -+static int sfp_quirk_i2c_block_size(const struct sfp_eeprom_base *base) -+{ -+ if (!memcmp(base->vendor_name, "VSOL ", 16)) -+ return 1; -+ if (!memcmp(base->vendor_name, "OEM ", 16) && -+ !memcmp(base->vendor_pn, "V2801F ", 16)) -+ return 1; -+ -+ /* Some modules can't cope with long reads */ -+ return 16; -+} -+ -+static void sfp_quirks_base(struct sfp *sfp, const struct sfp_eeprom_base *base) -+{ -+ sfp->i2c_block_size = sfp_quirk_i2c_block_size(base); -+} -+ - static int sfp_sm_mod_probe(struct sfp *sfp, bool report) - { - /* SFP module inserted - read I2C data */ -@@ -1577,14 +1609,20 @@ static int sfp_sm_mod_probe(struct sfp * - u8 check; - int ret; - -- ret = sfp_read(sfp, false, 0, &id, sizeof(id)); -+ /* Some modules (CarlitoxxPro CPGOS03-0490) do not support multibyte -+ * reads from the EEPROM, so start by reading the base identifying -+ * information one byte at a time. -+ */ -+ sfp->i2c_block_size = 1; -+ -+ ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); - if (ret < 0) { - if (report) - dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); - return -EAGAIN; - } - -- if (ret != sizeof(id)) { -+ if (ret != sizeof(id.base)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", ret); - return -EAGAIN; - } -@@ -1612,6 +1650,21 @@ static int sfp_sm_mod_probe(struct sfp * - } - } - -+ /* Apply any early module-specific quirks */ -+ sfp_quirks_base(sfp, &id.base); -+ -+ ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext)); -+ if (ret < 0) { -+ if (report) -+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); -+ return -EAGAIN; -+ } -+ -+ if (ret != sizeof(id.ext)) { -+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret); -+ return -EAGAIN; -+ } -+ - check = sfp_check(&id.ext, sizeof(id.ext) - 1); - if (check != id.ext.cc_ext) { - if (cotsworks) { diff --git a/target/linux/generic/backport-5.4/852-v5.10-0002-net-sfp-add-workaround-for-Realtek-RTL8672-and-RTL96.patch b/target/linux/generic/backport-5.4/852-v5.10-0002-net-sfp-add-workaround-for-Realtek-RTL8672-and-RTL96.patch deleted file mode 100644 index 27ae97cee7..0000000000 --- a/target/linux/generic/backport-5.4/852-v5.10-0002-net-sfp-add-workaround-for-Realtek-RTL8672-and-RTL96.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 426c6cbc409cbda9ab1a9dbf15d3c2ef947eb8c1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org> -Date: Mon, 25 Jan 2021 16:02:27 +0100 -Subject: [PATCH] net: sfp: add workaround for Realtek RTL8672 and RTL9601C - chips -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The workaround for VSOL V2801F brand based GPON SFP modules added in commit -0d035bed2a4a ("net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 -workaround") works only for IDs added explicitly to the list. Since there -are rebranded modules where OEM vendors put different strings into the -vendor name field, we cannot base workaround on IDs only. - -Moreover the issue which the above mentioned commit tried to work around is -generic not only to VSOL based modules, but rather to all GPON modules -based on Realtek RTL8672 and RTL9601C chips. - -These include at least the following GPON modules: -* V-SOL V2801F -* C-Data FD511GX-RM0 -* OPTON GP801R -* BAUDCOM BD-1234-SFM -* CPGOS03-0490 v2.0 -* Ubiquiti U-Fiber Instant -* EXOT EGS1 - -These Realtek chips have broken EEPROM emulator which for N-byte read -operation returns just the first byte of EEPROM data, followed by N-1 -zeros. - -Introduce a new function, sfp_id_needs_byte_io(), which detects SFP modules -with broken EEPROM emulator based on N-1 zeros and switch to 1 byte EEPROM -reading operation. - -Function sfp_i2c_read() now always uses single byte reading when it is -required and when function sfp_hwmon_probe() detects single byte access, -it disables registration of hwmon device, because in this case we cannot -reliably and atomically read 2 bytes as is required by the standard for -retrieving values from diagnostic area. - -(These Realtek chips are broken in a way that violates SFP standards for -diagnostic interface. Kernel in this case simply cannot do anything less -of skipping registration of the hwmon interface.) - -This patch fixes reading of EEPROM content from SFP modules based on -Realtek RTL8672 and RTL9601C chips. Diagnostic interface of EEPROM stays -broken and cannot be fixed. - -Fixes: 0d035bed2a4a ("net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 workaround") -Co-developed-by: Russell King <rmk+kernel@armlinux.org.uk> -Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> -Signed-off-by: Pali Rohár <pali@kernel.org> -Signed-off-by: Jakub Kicinski <kuba@kernel.org> ---- - drivers/net/phy/sfp.c | 100 ++++++++++++++++++++++++++++-------------- - 1 file changed, 67 insertions(+), 33 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -306,19 +306,11 @@ static int sfp_i2c_read(struct sfp *sfp, - size_t len) - { - struct i2c_msg msgs[2]; -- size_t block_size; -+ u8 bus_addr = a2 ? 0x51 : 0x50; -+ size_t block_size = sfp->i2c_block_size; - size_t this_len; -- u8 bus_addr; - int ret; - -- if (a2) { -- block_size = 16; -- bus_addr = 0x51; -- } else { -- block_size = sfp->i2c_block_size; -- bus_addr = 0x50; -- } -- - msgs[0].addr = bus_addr; - msgs[0].flags = 0; - msgs[0].len = 1; -@@ -1245,6 +1237,20 @@ static void sfp_hwmon_probe(struct work_ - struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work); - int err, i; - -+ /* hwmon interface needs to access 16bit registers in atomic way to -+ * guarantee coherency of the diagnostic monitoring data. If it is not -+ * possible to guarantee coherency because EEPROM is broken in such way -+ * that does not support atomic 16bit read operation then we have to -+ * skip registration of hwmon device. -+ */ -+ if (sfp->i2c_block_size < 2) { -+ dev_info(sfp->dev, -+ "skipping hwmon device registration due to broken EEPROM\n"); -+ dev_info(sfp->dev, -+ "diagnostic EEPROM area cannot be read atomically to guarantee data coherency\n"); -+ return; -+ } -+ - err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag)); - if (err < 0) { - if (sfp->hwmon_tries--) { -@@ -1579,26 +1585,30 @@ static int sfp_sm_mod_hpower(struct sfp - return 0; - } - --/* Some modules (Nokia 3FE46541AA) lock up if byte 0x51 is read as a -- * single read. Switch back to reading 16 byte blocks unless we have -- * a CarlitoxxPro module (rebranded VSOL V2801F). Even more annoyingly, -- * some VSOL V2801F have the vendor name changed to OEM. -+/* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL -+ * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do -+ * not support multibyte reads from the EEPROM. Each multi-byte read -+ * operation returns just one byte of EEPROM followed by zeros. There is -+ * no way to identify which modules are using Realtek RTL8672 and RTL9601C -+ * chips. Moreover every OEM of V-SOL V2801F module puts its own vendor -+ * name and vendor id into EEPROM, so there is even no way to detect if -+ * module is V-SOL V2801F. Therefore check for those zeros in the read -+ * data and then based on check switch to reading EEPROM to one byte -+ * at a time. - */ --static int sfp_quirk_i2c_block_size(const struct sfp_eeprom_base *base) -+static bool sfp_id_needs_byte_io(struct sfp *sfp, void *buf, size_t len) - { -- if (!memcmp(base->vendor_name, "VSOL ", 16)) -- return 1; -- if (!memcmp(base->vendor_name, "OEM ", 16) && -- !memcmp(base->vendor_pn, "V2801F ", 16)) -- return 1; -+ size_t i, block_size = sfp->i2c_block_size; - -- /* Some modules can't cope with long reads */ -- return 16; --} -+ /* Already using byte IO */ -+ if (block_size == 1) -+ return false; - --static void sfp_quirks_base(struct sfp *sfp, const struct sfp_eeprom_base *base) --{ -- sfp->i2c_block_size = sfp_quirk_i2c_block_size(base); -+ for (i = 1; i < len; i += block_size) { -+ if (memchr_inv(buf + i, '\0', min(block_size - 1, len - i))) -+ return false; -+ } -+ return true; - } - - static int sfp_sm_mod_probe(struct sfp *sfp, bool report) -@@ -1609,11 +1619,11 @@ static int sfp_sm_mod_probe(struct sfp * - u8 check; - int ret; - -- /* Some modules (CarlitoxxPro CPGOS03-0490) do not support multibyte -- * reads from the EEPROM, so start by reading the base identifying -- * information one byte at a time. -+ /* Some SFP modules and also some Linux I2C drivers do not like reads -+ * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at -+ * a time. - */ -- sfp->i2c_block_size = 1; -+ sfp->i2c_block_size = 16; - - ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); - if (ret < 0) { -@@ -1627,6 +1637,33 @@ static int sfp_sm_mod_probe(struct sfp * - return -EAGAIN; - } - -+ /* Some SFP modules (e.g. Nokia 3FE46541AA) lock up if read from -+ * address 0x51 is just one byte at a time. Also SFF-8472 requires -+ * that EEPROM supports atomic 16bit read operation for diagnostic -+ * fields, so do not switch to one byte reading at a time unless it -+ * is really required and we have no other option. -+ */ -+ if (sfp_id_needs_byte_io(sfp, &id.base, sizeof(id.base))) { -+ dev_info(sfp->dev, -+ "Detected broken RTL8672/RTL9601C emulated EEPROM\n"); -+ dev_info(sfp->dev, -+ "Switching to reading EEPROM to one byte at a time\n"); -+ sfp->i2c_block_size = 1; -+ -+ ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); -+ if (ret < 0) { -+ if (report) -+ dev_err(sfp->dev, "failed to read EEPROM: %d\n", -+ ret); -+ return -EAGAIN; -+ } -+ -+ if (ret != sizeof(id.base)) { -+ dev_err(sfp->dev, "EEPROM short read: %d\n", ret); -+ return -EAGAIN; -+ } -+ } -+ - /* Cotsworks do not seem to update the checksums when they - * do the final programming with the final module part number, - * serial number and date code. -@@ -1650,9 +1687,6 @@ static int sfp_sm_mod_probe(struct sfp * - } - } - -- /* Apply any early module-specific quirks */ -- sfp_quirks_base(sfp, &id.base); -- - ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext)); - if (ret < 0) { - if (report) diff --git a/target/linux/generic/backport-5.4/900-v5.9-0001-dt-bindings-Add-multicolor-class-dt-bindings-documen.patch b/target/linux/generic/backport-5.4/900-v5.9-0001-dt-bindings-Add-multicolor-class-dt-bindings-documen.patch deleted file mode 100644 index acc32b69fb..0000000000 --- a/target/linux/generic/backport-5.4/900-v5.9-0001-dt-bindings-Add-multicolor-class-dt-bindings-documen.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 5c7f8ffe741daae7f8d811a2037b2693f02c90c5 Mon Sep 17 00:00:00 2001 -From: Dan Murphy <dmurphy@ti.com> -Date: Mon, 13 Jul 2020 10:45:31 -0500 -Subject: [PATCH] dt: bindings: Add multicolor class dt bindings documention -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add DT bindings for the LEDs multicolor class framework. -Add multicolor ID to the color ID list for device tree bindings. - -CC: Rob Herring <robh@kernel.org> -Reviewed-by: Rob Herring <robh@kernel.org> -Acked-by: Pavel Machek <pavel@ucw.cz> -Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com> -Signed-off-by: Dan Murphy <dmurphy@ti.com> -Reviewed-by: Marek Behún <marek.behun@nic.cz> -Signed-off-by: Pavel Machek <pavel@ucw.cz> ---- - .../bindings/leds/leds-class-multicolor.yaml | 37 +++++++++++++++++++ - include/dt-bindings/leds/common.h | 3 +- - 2 files changed, 39 insertions(+), 1 deletion(-) - create mode 100644 Documentation/devicetree/bindings/leds/leds-class-multicolor.yaml - ---- /dev/null -+++ b/Documentation/devicetree/bindings/leds/leds-class-multicolor.yaml -@@ -0,0 +1,37 @@ -+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/leds/leds-class-multicolor.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Common properties for the multicolor LED class. -+ -+maintainers: -+ - Dan Murphy <dmurphy@ti.com> -+ -+description: | -+ Bindings for multi color LEDs show how to describe current outputs of -+ either integrated multi-color LED elements (like RGB, RGBW, RGBWA-UV -+ etc.) or standalone LEDs, to achieve logically grouped multi-color LED -+ modules. This is achieved by adding multi-led nodes layer to the -+ monochrome LED bindings. -+ The nodes and properties defined in this document are unique to the multicolor -+ LED class. Common LED nodes and properties are inherited from the common.txt -+ within this documentation directory. -+ -+patternProperties: -+ "^multi-led@([0-9a-f])$": -+ type: object -+ description: Represents the LEDs that are to be grouped. -+ properties: -+ color: -+ const: 8 # LED_COLOR_ID_MULTI -+ description: | -+ For multicolor LED support this property should be defined as -+ LED_COLOR_ID_MULTI which can be found in include/linux/leds/common.h. -+ -+ $ref: "common.yaml#" -+ -+ required: -+ - color -+... ---- a/include/dt-bindings/leds/common.h -+++ b/include/dt-bindings/leds/common.h -@@ -29,7 +29,8 @@ - #define LED_COLOR_ID_VIOLET 5 - #define LED_COLOR_ID_YELLOW 6 - #define LED_COLOR_ID_IR 7 --#define LED_COLOR_ID_MAX 8 -+#define LED_COLOR_ID_MULTI 8 -+#define LED_COLOR_ID_MAX 9 - - /* Standard LED functions */ - #define LED_FUNCTION_ACTIVITY "activity" diff --git a/target/linux/generic/backport-5.4/900-v5.9-0002-leds-Add-multicolor-ID-to-the-color-ID-list.patch b/target/linux/generic/backport-5.4/900-v5.9-0002-leds-Add-multicolor-ID-to-the-color-ID-list.patch deleted file mode 100644 index 5de5dbda04..0000000000 --- a/target/linux/generic/backport-5.4/900-v5.9-0002-leds-Add-multicolor-ID-to-the-color-ID-list.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 10d3e0d815879129e916cd83e1034438e06efdaa Mon Sep 17 00:00:00 2001 -From: Dan Murphy <dmurphy@ti.com> -Date: Mon, 13 Jul 2020 10:45:32 -0500 -Subject: [PATCH] leds: Add multicolor ID to the color ID list -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add a new color ID that is declared as MULTICOLOR as with the -multicolor framework declaring a definitive color is not accurate -as the node can contain multiple colors. - -Signed-off-by: Dan Murphy <dmurphy@ti.com> -Reviewed-by: Marek Behún <marek.behun@nic.cz> -Signed-off-by: Pavel Machek <pavel@ucw.cz> ---- - drivers/leds/led-core.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/leds/led-core.c -+++ b/drivers/leds/led-core.c -@@ -34,6 +34,7 @@ const char * const led_colors[LED_COLOR_ - [LED_COLOR_ID_VIOLET] = "violet", - [LED_COLOR_ID_YELLOW] = "yellow", - [LED_COLOR_ID_IR] = "ir", -+ [LED_COLOR_ID_MULTI] = "multicolor", - }; - EXPORT_SYMBOL_GPL(led_colors); - diff --git a/target/linux/generic/backport-5.4/900-v5.9-0003-leds-add-RGB-color-option-as-that-is-different-from-.patch b/target/linux/generic/backport-5.4/900-v5.9-0003-leds-add-RGB-color-option-as-that-is-different-from-.patch deleted file mode 100644 index 17c28149f6..0000000000 --- a/target/linux/generic/backport-5.4/900-v5.9-0003-leds-add-RGB-color-option-as-that-is-different-from-.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 54212f5a1ba3123281877e54c1e5f672bf7563d8 Mon Sep 17 00:00:00 2001 -From: Pavel Machek <pavel@ucw.cz> -Date: Mon, 3 Aug 2020 13:20:06 +0200 -Subject: [PATCH] leds: add RGB color option, as that is different from - multicolor. - -Multicolor is a bit too abstract. Yes, we can have -Green-Magenta-Ultraviolet LED, but so far all the LEDs we support are -RGB, and not even RGB-White or RGB-Yellow variants emerged. - -Multicolor is not a good fit for RGB LED. It does not really know -about LED color. In particular, there's no way to make LED "white". - -Userspace is interested in knowing "this LED can produce arbitrary -color", which not all multicolor LEDs can. - -Signed-off-by: Pavel Machek <pavel@ucw.cz> ---- - drivers/leds/led-core.c | 1 + - drivers/leds/leds-lp55xx-common.c | 2 +- - include/dt-bindings/leds/common.h | 6 ++++-- - 3 files changed, 6 insertions(+), 3 deletions(-) - ---- a/drivers/leds/led-core.c -+++ b/drivers/leds/led-core.c -@@ -35,6 +35,7 @@ const char * const led_colors[LED_COLOR_ - [LED_COLOR_ID_YELLOW] = "yellow", - [LED_COLOR_ID_IR] = "ir", - [LED_COLOR_ID_MULTI] = "multicolor", -+ [LED_COLOR_ID_RGB] = "rgb", - }; - EXPORT_SYMBOL_GPL(led_colors); - ---- a/include/dt-bindings/leds/common.h -+++ b/include/dt-bindings/leds/common.h -@@ -29,8 +29,10 @@ - #define LED_COLOR_ID_VIOLET 5 - #define LED_COLOR_ID_YELLOW 6 - #define LED_COLOR_ID_IR 7 --#define LED_COLOR_ID_MULTI 8 --#define LED_COLOR_ID_MAX 9 -+#define LED_COLOR_ID_MULTI 8 /* For multicolor LEDs */ -+#define LED_COLOR_ID_RGB 9 /* For multicolor LEDs that can do arbitrary color, -+ so this would include RGBW and similar */ -+#define LED_COLOR_ID_MAX 10 - - /* Standard LED functions */ - #define LED_FUNCTION_ACTIVITY "activity" |