aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
diff options
context:
space:
mode:
authorHauke Mehrtens <hauke@hauke-m.de>2022-03-05 19:08:27 +0100
committerHauke Mehrtens <hauke@hauke-m.de>2022-08-14 00:29:20 +0200
commit6c901ec97d73a7835c2bb7525e51cc3d3614f344 (patch)
treef6c40f3a1f677c49855e17a4b28a53690fb48672 /target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
parent6a638c134d790413ea2c6976caaa89f73375310c (diff)
downloadupstream-6c901ec97d73a7835c2bb7525e51cc3d3614f344.tar.gz
upstream-6c901ec97d73a7835c2bb7525e51cc3d3614f344.tar.bz2
upstream-6c901ec97d73a7835c2bb7525e51cc3d3614f344.zip
kernel: Backport upstream flowtable patches from 5.15
This backports some patches from kernel 5.15 to fix issues with flowtable offloading in kernel 5.10. OpenWrt backports most of the patches related to flowtable offloading from kernel 5.15 already, but we are missing some of the extra fixes. This fixes some connection tracking problems when a flow gets removed from the offload and added to the normal SW path again. The patch 614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch was extended manually with the nf_conntrack_tcp_established() function. All changes are already included in kernel 5.15. Fixes: #8776 Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de> (cherry picked from commit 96ef2dabce1a5f102d53a15f33383193b47fd297)
Diffstat (limited to 'target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch')
-rw-r--r--target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch166
1 files changed, 166 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
new file mode 100644
index 0000000000..1b422ca4af
--- /dev/null
+++ b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
@@ -0,0 +1,166 @@
+From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 17 May 2022 10:44:14 +0200
+Subject: netfilter: flowtable: fix TCP flow teardown
+
+[ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ]
+
+This patch addresses three possible problems:
+
+1. ct gc may race to undo the timeout adjustment of the packet path, leaving
+ the conntrack entry in place with the internal offload timeout (one day).
+
+2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
+ timeout is reached before the flow offload del.
+
+3. tcp ct is always set to ESTABLISHED with a very long timeout
+ in flow offload teardown/delete even though the state might be already
+ CLOSED. Also as a remark we cannot assume that the FIN or RST packet
+ is hitting flow table teardown as the packet might get bumped to the
+ slow path in nftables.
+
+This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
+conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
+state transition.
+
+Moreover, teturn the connection's ownership to conntrack upon teardown
+by clearing the offload flag and fixing the established timeout value.
+The flow table GC thread will asynchonrnously free the flow table and
+hardware offload entries.
+
+Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
+which is also misleading since the flow is back to classic conntrack
+path.
+
+If nf_ct_delete() removes the entry from the conntrack table, then it
+calls nf_ct_put() which decrements the refcnt. This is not a problem
+because the flowtable holds a reference to the conntrack object from
+flow_offload_alloc() path which is released via flow_offload_free().
+
+This patch also updates nft_flow_offload to skip packets in SYN_RECV
+state. Since we might miss or bump packets to slow path, we do not know
+what will happen there while we are still in SYN_RECV, this patch
+postpones offload up to the next packet which also aligns to the
+existing behaviour in tc-ct.
+
+flow_offload_teardown() does not reset the existing tcp state from
+flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
+path might have already update the state to CLOSE/FIN.
+
+Joint work with Oz and Sven.
+
+Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
+Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
+ net/netfilter/nft_flow_offload.c | 3 ++-
+ 2 files changed, 9 insertions(+), 27 deletions(-)
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_ini
+
+ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+- tcp->state = TCP_CONNTRACK_ESTABLISHED;
+ tcp->seen[0].td_maxwin = 0;
+ tcp->seen[1].td_maxwin = 0;
+ }
+
+-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
++static void flow_offload_fixup_ct(struct nf_conn *ct)
+ {
+ struct net *net = nf_ct_net(ct);
+ int l4num = nf_ct_protonum(ct);
+@@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeou
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++ flow_offload_fixup_tcp(&ct->proto.tcp);
++
++ timeout = tn->timeouts[ct->proto.tcp.state];
+ timeout -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+@@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeou
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+
+-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+-{
+- if (nf_ct_protonum(ct) == IPPROTO_TCP)
+- flow_offload_fixup_tcp(&ct->proto.tcp);
+-}
+-
+-static void flow_offload_fixup_ct(struct nf_conn *ct)
+-{
+- flow_offload_fixup_ct_state(ct);
+- flow_offload_fixup_ct_timeout(ct);
+-}
+-
+ static void flow_offload_route_release(struct flow_offload *flow)
+ {
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+@@ -353,22 +342,14 @@ static void flow_offload_del(struct nf_f
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ nf_flow_offload_rhash_params);
+-
+- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+-
+- if (nf_flow_has_expired(flow))
+- flow_offload_fixup_ct(flow->ct);
+- else
+- flow_offload_fixup_ct_timeout(flow->ct);
+-
+ flow_offload_free(flow);
+ }
+
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
++ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+-
+- flow_offload_fixup_ct_state(flow->ct);
++ flow_offload_fixup_ct(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
+@@ -437,7 +418,7 @@ static void nf_flow_offload_gc_step(stru
+
+ if (nf_flow_has_expired(flow) ||
+ nf_ct_is_dying(flow->ct))
+- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++ flow_offload_teardown(flow);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -268,6 +268,12 @@ static bool nft_flow_offload_skip(struct
+ return false;
+ }
+
++static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
++{
++ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
++ test_bit(IPS_ASSURED_BIT, &ct->status);
++}
++
+ static void nft_flow_offload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -293,7 +299,8 @@ static void nft_flow_offload_eval(const
+ case IPPROTO_TCP:
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+ sizeof(_tcph), &_tcph);
+- if (unlikely(!tcph || tcph->fin || tcph->rst))
++ if (unlikely(!tcph || tcph->fin || tcph->rst ||
++ !nf_conntrack_tcp_established(ct)))
+ goto out;
+ break;
+ case IPPROTO_UDP: