aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch')
-rw-r--r--target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch237
1 files changed, 0 insertions, 237 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch b/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
deleted file mode 100644
index 78da24ea46..0000000000
--- a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
+++ /dev/null
@@ -1,237 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Jason A. Donenfeld" <Jason@zx2c4.com>
-Date: Fri, 4 Jun 2021 17:17:36 +0200
-Subject: [PATCH] wireguard: allowedips: remove nodes in O(1)
-
-commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream.
-
-Previously, deleting peers would require traversing the entire trie in
-order to rebalance nodes and safely free them. This meant that removing
-1000 peers from a trie with a half million nodes would take an extremely
-long time, during which we're holding the rtnl lock. Large-scale users
-were reporting 200ms latencies added to the networking stack as a whole
-every time their userspace software would queue up significant removals.
-That's a serious situation.
-
-This commit fixes that by maintaining a double pointer to the parent's
-bit pointer for each node, and then using the already existing node list
-belonging to each peer to go directly to the node, fix up its pointers,
-and free it with RCU. This means removal is O(1) instead of O(n), and we
-don't use gobs of stack.
-
-The removal algorithm has the same downside as the code that it fixes:
-it won't collapse needlessly long runs of fillers. We can enhance that
-in the future if it ever becomes a problem. This commit documents that
-limitation with a TODO comment in code, a small but meaningful
-improvement over the prior situation.
-
-Currently the biggest flaw, which the next commit addresses, is that
-because this increases the node size on 64-bit machines from 60 bytes to
-68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up
-using twice as much memory per node, because of power-of-two
-allocations, which is a big bummer. We'll need to figure something out
-there.
-
-Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
-Cc: stable@vger.kernel.org
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
----
- drivers/net/wireguard/allowedips.c | 132 ++++++++++++-----------------
- drivers/net/wireguard/allowedips.h | 9 +-
- 2 files changed, 57 insertions(+), 84 deletions(-)
-
---- a/drivers/net/wireguard/allowedips.c
-+++ b/drivers/net/wireguard/allowedips.c
-@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc
- }
- }
-
--static void walk_remove_by_peer(struct allowedips_node __rcu **top,
-- struct wg_peer *peer, struct mutex *lock)
--{
--#define REF(p) rcu_access_pointer(p)
--#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
--#define PUSH(p) ({ \
-- WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
-- stack[len++] = p; \
-- })
--
-- struct allowedips_node __rcu **stack[128], **nptr;
-- struct allowedips_node *node, *prev;
-- unsigned int len;
--
-- if (unlikely(!peer || !REF(*top)))
-- return;
--
-- for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
-- nptr = stack[len - 1];
-- node = DEREF(nptr);
-- if (!node) {
-- --len;
-- continue;
-- }
-- if (!prev || REF(prev->bit[0]) == node ||
-- REF(prev->bit[1]) == node) {
-- if (REF(node->bit[0]))
-- PUSH(&node->bit[0]);
-- else if (REF(node->bit[1]))
-- PUSH(&node->bit[1]);
-- } else if (REF(node->bit[0]) == prev) {
-- if (REF(node->bit[1]))
-- PUSH(&node->bit[1]);
-- } else {
-- if (rcu_dereference_protected(node->peer,
-- lockdep_is_held(lock)) == peer) {
-- RCU_INIT_POINTER(node->peer, NULL);
-- list_del_init(&node->peer_list);
-- if (!node->bit[0] || !node->bit[1]) {
-- rcu_assign_pointer(*nptr, DEREF(
-- &node->bit[!REF(node->bit[0])]));
-- kfree_rcu(node, rcu);
-- node = DEREF(nptr);
-- }
-- }
-- --len;
-- }
-- }
--
--#undef REF
--#undef DEREF
--#undef PUSH
--}
--
- static unsigned int fls128(u64 a, u64 b)
- {
- return a ? fls64(a) + 64U : fls64(b);
-@@ -224,6 +170,7 @@ static int add(struct allowedips_node __
- RCU_INIT_POINTER(node->peer, peer);
- list_add_tail(&node->peer_list, &peer->allowedips_list);
- copy_and_assign_cidr(node, key, cidr, bits);
-+ rcu_assign_pointer(node->parent_bit, trie);
- rcu_assign_pointer(*trie, node);
- return 0;
- }
-@@ -243,9 +190,9 @@ static int add(struct allowedips_node __
- if (!node) {
- down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
- } else {
-- down = rcu_dereference_protected(CHOOSE_NODE(node, key),
-- lockdep_is_held(lock));
-+ down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
- if (!down) {
-+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
- rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
- return 0;
- }
-@@ -254,29 +201,37 @@ static int add(struct allowedips_node __
- parent = node;
-
- if (newnode->cidr == cidr) {
-+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
- rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
-- if (!parent)
-+ if (!parent) {
-+ rcu_assign_pointer(newnode->parent_bit, trie);
- rcu_assign_pointer(*trie, newnode);
-- else
-- rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
-- newnode);
-- } else {
-- node = kzalloc(sizeof(*node), GFP_KERNEL);
-- if (unlikely(!node)) {
-- list_del(&newnode->peer_list);
-- kfree(newnode);
-- return -ENOMEM;
-+ } else {
-+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
-+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
- }
-- INIT_LIST_HEAD(&node->peer_list);
-- copy_and_assign_cidr(node, newnode->bits, cidr, bits);
-+ return 0;
-+ }
-+
-+ node = kzalloc(sizeof(*node), GFP_KERNEL);
-+ if (unlikely(!node)) {
-+ list_del(&newnode->peer_list);
-+ kfree(newnode);
-+ return -ENOMEM;
-+ }
-+ INIT_LIST_HEAD(&node->peer_list);
-+ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
-
-- rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-- rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-- if (!parent)
-- rcu_assign_pointer(*trie, node);
-- else
-- rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
-- node);
-+ rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
-+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-+ rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
-+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-+ if (!parent) {
-+ rcu_assign_pointer(node->parent_bit, trie);
-+ rcu_assign_pointer(*trie, node);
-+ } else {
-+ rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
-+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
- }
- return 0;
- }
-@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow
- void wg_allowedips_remove_by_peer(struct allowedips *table,
- struct wg_peer *peer, struct mutex *lock)
- {
-+ struct allowedips_node *node, *child, *tmp;
-+
-+ if (list_empty(&peer->allowedips_list))
-+ return;
- ++table->seq;
-- walk_remove_by_peer(&table->root4, peer, lock);
-- walk_remove_by_peer(&table->root6, peer, lock);
-+ list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
-+ list_del_init(&node->peer_list);
-+ RCU_INIT_POINTER(node->peer, NULL);
-+ if (node->bit[0] && node->bit[1])
-+ continue;
-+ child = rcu_dereference_protected(
-+ node->bit[!rcu_access_pointer(node->bit[0])],
-+ lockdep_is_held(lock));
-+ if (child)
-+ child->parent_bit = node->parent_bit;
-+ *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
-+ kfree_rcu(node, rcu);
-+
-+ /* TODO: Note that we currently don't walk up and down in order to
-+ * free any potential filler nodes. This means that this function
-+ * doesn't free up as much as it could, which could be revisited
-+ * at some point.
-+ */
-+ }
- }
-
- int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
---- a/drivers/net/wireguard/allowedips.h
-+++ b/drivers/net/wireguard/allowedips.h
-@@ -15,14 +15,11 @@ struct wg_peer;
- struct allowedips_node {
- struct wg_peer __rcu *peer;
- struct allowedips_node __rcu *bit[2];
-- /* While it may seem scandalous that we waste space for v4,
-- * we're alloc'ing to the nearest power of 2 anyway, so this
-- * doesn't actually make a difference.
-- */
-- u8 bits[16] __aligned(__alignof(u64));
- u8 cidr, bit_at_a, bit_at_b, bitlen;
-+ u8 bits[16] __aligned(__alignof(u64));
-
-- /* Keep rarely used list at bottom to be beyond cache line. */
-+ /* Keep rarely used members at bottom to be beyond cache line. */
-+ struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
- union {
- struct list_head peer_list;
- struct rcu_head rcu;