diff options
-rw-r--r-- | target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch b/target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch new file mode 100644 index 0000000000..7b3396c6c3 --- /dev/null +++ b/target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch @@ -0,0 +1,170 @@ +From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com> +Date: Fri, 29 May 2020 14:43:44 +0200 +Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +While the other fq-based qdiscs take advantage of skb->hash and doesn't +recompute it if it is already set, sch_cake does not. + +This was a deliberate choice because sch_cake hashes various parts of the +packet header to support its advanced flow isolation modes. However, +foregoing the use of skb->hash entirely loses a few important benefits: + +- When skb->hash is set by hardware, a few CPU cycles can be saved by not + hashing again in software. + +- Tunnel encapsulations will generally preserve the value of skb->hash from + before the encapsulation, which allows flow-based qdiscs to distinguish + between flows even though the outer packet header no longer has flow + information. + +It turns out that we can preserve these desirable properties in many cases, +while still supporting the advanced flow isolation properties of sch_cake. +This patch does so by reusing the skb->hash value as the flow_hash part of +the hashing procedure in cake_hash() only in the following conditions: + +- If the skb->hash is marked as covering the flow headers (skb->l4_hash is + set) + +AND + +- NAT header rewriting is either disabled, or did not change any values + used for hashing. The latter is important to match local-origin packets + such as those of a tunnel endpoint. + +The immediate motivation for fixing this was the recent patch to WireGuard +to preserve the skb->hash on encapsulation. As such, this is also what I +tested against; with this patch, added latency under load for competing +flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel +going through a virtual link shaped to 1Gbps using sch_cake. This matches +the results we saw with a similar setup using sch_fq_codel when testing the +WireGuard patch. + +Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") +Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> +--- + net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++---------- + 1 file changed, 51 insertions(+), 14 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co + return drop; + } + +-static void cake_update_flowkeys(struct flow_keys *keys, ++static bool cake_update_flowkeys(struct flow_keys *keys, + const struct sk_buff *skb) + { + #if IS_ENABLED(CONFIG_NF_CONNTRACK) + struct nf_conntrack_tuple tuple = {}; +- bool rev = !skb->_nfct; ++ bool rev = !skb->_nfct, upd = false; ++ __be32 ip; + + if (tc_skb_protocol(skb) != htons(ETH_P_IP)) +- return; ++ return false; + + if (!nf_ct_get_tuple_skb(&tuple, skb)) +- return; ++ return false; + +- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; +- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; ++ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; ++ if (ip != keys->addrs.v4addrs.src) { ++ keys->addrs.v4addrs.src = ip; ++ upd = true; ++ } ++ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; ++ if (ip != keys->addrs.v4addrs.dst) { ++ keys->addrs.v4addrs.dst = ip; ++ upd = true; ++ } + + if (keys->ports.ports) { +- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; +- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; ++ __be16 port; ++ ++ port = rev ? tuple.dst.u.all : tuple.src.u.all; ++ if (port != keys->ports.src) { ++ keys->ports.src = port; ++ upd = true; ++ } ++ port = rev ? tuple.src.u.all : tuple.dst.u.all; ++ if (port != keys->ports.dst) { ++ port = keys->ports.dst; ++ upd = true; ++ } + } ++ return upd; ++#else ++ return false; + #endif + } + +@@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode) + static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, + int flow_mode, u16 flow_override, u16 host_override) + { ++ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS)); ++ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS)); ++ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG); + u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; + u16 reduced_hash, srchost_idx, dsthost_idx; + struct flow_keys keys, host_keys; ++ bool use_skbhash = skb->l4_hash; + + if (unlikely(flow_mode == CAKE_FLOW_NONE)) + return 0; + +- /* If both overrides are set we can skip packet dissection entirely */ +- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && +- (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) ++ /* If both overrides are set, or we can use the SKB hash and nat mode is ++ * disabled, we can skip packet dissection entirely. If nat mode is ++ * enabled there's another check below after doing the conntrack lookup. ++ */ ++ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts) + goto skip_hash; + + skb_flow_dissect_flow_keys(skb, &keys, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + +- if (flow_mode & CAKE_FLOW_NAT_FLAG) +- cake_update_flowkeys(&keys, skb); ++ /* Don't use the SKB hash if we change the lookup keys from conntrack */ ++ if (nat_enabled && cake_update_flowkeys(&keys, skb)) ++ use_skbhash = false; ++ ++ /* If we can still use the SKB hash and don't need the host hash, we can ++ * skip the rest of the hashing procedure ++ */ ++ if (use_skbhash && !hash_hosts) ++ goto skip_hash; + + /* flow_hash_from_keys() sorts the addresses by value, so we have + * to preserve their order in a separate data structure to treat +@@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat + /* This *must* be after the above switch, since as a + * side-effect it sorts the src and dst addresses. + */ +- if (flow_mode & CAKE_FLOW_FLOWS) ++ if (hash_flows && !use_skbhash) + flow_hash = flow_hash_from_keys(&keys); + + skip_hash: + if (flow_override) + flow_hash = flow_override - 1; ++ else if (use_skbhash) ++ flow_hash = skb->hash; + if (host_override) { + dsthost_hash = host_override - 1; + srchost_hash = host_override - 1; |