aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch')
-rw-r--r--target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch166
1 files changed, 166 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
new file mode 100644
index 0000000000..1b422ca4af
--- /dev/null
+++ b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch
@@ -0,0 +1,166 @@
+From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 17 May 2022 10:44:14 +0200
+Subject: netfilter: flowtable: fix TCP flow teardown
+
+[ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ]
+
+This patch addresses three possible problems:
+
+1. ct gc may race to undo the timeout adjustment of the packet path, leaving
+ the conntrack entry in place with the internal offload timeout (one day).
+
+2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE
+ timeout is reached before the flow offload del.
+
+3. tcp ct is always set to ESTABLISHED with a very long timeout
+ in flow offload teardown/delete even though the state might be already
+ CLOSED. Also as a remark we cannot assume that the FIN or RST packet
+ is hitting flow table teardown as the packet might get bumped to the
+ slow path in nftables.
+
+This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so
+conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN
+state transition.
+
+Moreover, teturn the connection's ownership to conntrack upon teardown
+by clearing the offload flag and fixing the established timeout value.
+The flow table GC thread will asynchonrnously free the flow table and
+hardware offload entries.
+
+Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on
+which is also misleading since the flow is back to classic conntrack
+path.
+
+If nf_ct_delete() removes the entry from the conntrack table, then it
+calls nf_ct_put() which decrements the refcnt. This is not a problem
+because the flowtable holds a reference to the conntrack object from
+flow_offload_alloc() path which is released via flow_offload_free().
+
+This patch also updates nft_flow_offload to skip packets in SYN_RECV
+state. Since we might miss or bump packets to slow path, we do not know
+what will happen there while we are still in SYN_RECV, this patch
+postpones offload up to the next packet which also aligns to the
+existing behaviour in tc-ct.
+
+flow_offload_teardown() does not reset the existing tcp state from
+flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow
+path might have already update the state to CLOSE/FIN.
+
+Joint work with Oz and Sven.
+
+Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race")
+Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_flow_table_core.c | 33 +++++++-----------------------
+ net/netfilter/nft_flow_offload.c | 3 ++-
+ 2 files changed, 9 insertions(+), 27 deletions(-)
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_ini
+
+ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+- tcp->state = TCP_CONNTRACK_ESTABLISHED;
+ tcp->seen[0].td_maxwin = 0;
+ tcp->seen[1].td_maxwin = 0;
+ }
+
+-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
++static void flow_offload_fixup_ct(struct nf_conn *ct)
+ {
+ struct net *net = nf_ct_net(ct);
+ int l4num = nf_ct_protonum(ct);
+@@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeou
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++ flow_offload_fixup_tcp(&ct->proto.tcp);
++
++ timeout = tn->timeouts[ct->proto.tcp.state];
+ timeout -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+@@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeou
+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+
+-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+-{
+- if (nf_ct_protonum(ct) == IPPROTO_TCP)
+- flow_offload_fixup_tcp(&ct->proto.tcp);
+-}
+-
+-static void flow_offload_fixup_ct(struct nf_conn *ct)
+-{
+- flow_offload_fixup_ct_state(ct);
+- flow_offload_fixup_ct_timeout(ct);
+-}
+-
+ static void flow_offload_route_release(struct flow_offload *flow)
+ {
+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+@@ -353,22 +342,14 @@ static void flow_offload_del(struct nf_f
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ nf_flow_offload_rhash_params);
+-
+- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+-
+- if (nf_flow_has_expired(flow))
+- flow_offload_fixup_ct(flow->ct);
+- else
+- flow_offload_fixup_ct_timeout(flow->ct);
+-
+ flow_offload_free(flow);
+ }
+
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
++ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+-
+- flow_offload_fixup_ct_state(flow->ct);
++ flow_offload_fixup_ct(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+
+@@ -437,7 +418,7 @@ static void nf_flow_offload_gc_step(stru
+
+ if (nf_flow_has_expired(flow) ||
+ nf_ct_is_dying(flow->ct))
+- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++ flow_offload_teardown(flow);
+
+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
+--- a/net/netfilter/nft_flow_offload.c
++++ b/net/netfilter/nft_flow_offload.c
+@@ -268,6 +268,12 @@ static bool nft_flow_offload_skip(struct
+ return false;
+ }
+
++static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
++{
++ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
++ test_bit(IPS_ASSURED_BIT, &ct->status);
++}
++
+ static void nft_flow_offload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+@@ -293,7 +299,8 @@ static void nft_flow_offload_eval(const
+ case IPPROTO_TCP:
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+ sizeof(_tcph), &_tcph);
+- if (unlikely(!tcph || tcph->fin || tcph->rst))
++ if (unlikely(!tcph || tcph->fin || tcph->rst ||
++ !nf_conntrack_tcp_established(ct)))
+ goto out;
+ break;
+ case IPPROTO_UDP: