diff options
Diffstat (limited to 'target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch')
-rw-r--r-- | target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch new file mode 100644 index 0000000000..1b422ca4af --- /dev/null +++ b/target/linux/generic/backport-5.10/614-v5.18-netfilter-flowtable-fix-TCP-flow-teardown.patch @@ -0,0 +1,166 @@ +From b8835ba8c029b5c9ada5666754526c2b00f7ea80 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 17 May 2022 10:44:14 +0200 +Subject: netfilter: flowtable: fix TCP flow teardown + +[ Upstream commit e5eaac2beb54f0a16ff851125082d9faeb475572 ] + +This patch addresses three possible problems: + +1. ct gc may race to undo the timeout adjustment of the packet path, leaving + the conntrack entry in place with the internal offload timeout (one day). + +2. ct gc removes the ct because the IPS_OFFLOAD_BIT is not set and the CLOSE + timeout is reached before the flow offload del. + +3. tcp ct is always set to ESTABLISHED with a very long timeout + in flow offload teardown/delete even though the state might be already + CLOSED. Also as a remark we cannot assume that the FIN or RST packet + is hitting flow table teardown as the packet might get bumped to the + slow path in nftables. + +This patch resets IPS_OFFLOAD_BIT from flow_offload_teardown(), so +conntrack handles the tcp rst/fin packet which triggers the CLOSE/FIN +state transition. + +Moreover, teturn the connection's ownership to conntrack upon teardown +by clearing the offload flag and fixing the established timeout value. +The flow table GC thread will asynchonrnously free the flow table and +hardware offload entries. + +Before this patch, the IPS_OFFLOAD_BIT remained set for expired flows on +which is also misleading since the flow is back to classic conntrack +path. + +If nf_ct_delete() removes the entry from the conntrack table, then it +calls nf_ct_put() which decrements the refcnt. This is not a problem +because the flowtable holds a reference to the conntrack object from +flow_offload_alloc() path which is released via flow_offload_free(). + +This patch also updates nft_flow_offload to skip packets in SYN_RECV +state. Since we might miss or bump packets to slow path, we do not know +what will happen there while we are still in SYN_RECV, this patch +postpones offload up to the next packet which also aligns to the +existing behaviour in tc-ct. + +flow_offload_teardown() does not reset the existing tcp state from +flow_offload_fixup_tcp() to ESTABLISHED anymore, packets bump to slow +path might have already update the state to CLOSE/FIN. + +Joint work with Oz and Sven. + +Fixes: 1e5b2471bcc4 ("netfilter: nf_flow_table: teardown flow timeout race") +Signed-off-by: Oz Shlomo <ozsh@nvidia.com> +Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/netfilter/nf_flow_table_core.c | 33 +++++++----------------------- + net/netfilter/nft_flow_offload.c | 3 ++- + 2 files changed, 9 insertions(+), 27 deletions(-) + +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -173,12 +173,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_ini + + static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) + { +- tcp->state = TCP_CONNTRACK_ESTABLISHED; + tcp->seen[0].td_maxwin = 0; + tcp->seen[1].td_maxwin = 0; + } + +-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) ++static void flow_offload_fixup_ct(struct nf_conn *ct) + { + struct net *net = nf_ct_net(ct); + int l4num = nf_ct_protonum(ct); +@@ -187,7 +186,9 @@ static void flow_offload_fixup_ct_timeou + if (l4num == IPPROTO_TCP) { + struct nf_tcp_net *tn = nf_tcp_pernet(net); + +- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; ++ flow_offload_fixup_tcp(&ct->proto.tcp); ++ ++ timeout = tn->timeouts[ct->proto.tcp.state]; + timeout -= tn->offload_timeout; + } else if (l4num == IPPROTO_UDP) { + struct nf_udp_net *tn = nf_udp_pernet(net); +@@ -205,18 +206,6 @@ static void flow_offload_fixup_ct_timeou + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); + } + +-static void flow_offload_fixup_ct_state(struct nf_conn *ct) +-{ +- if (nf_ct_protonum(ct) == IPPROTO_TCP) +- flow_offload_fixup_tcp(&ct->proto.tcp); +-} +- +-static void flow_offload_fixup_ct(struct nf_conn *ct) +-{ +- flow_offload_fixup_ct_state(ct); +- flow_offload_fixup_ct_timeout(ct); +-} +- + static void flow_offload_route_release(struct flow_offload *flow) + { + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); +@@ -353,22 +342,14 @@ static void flow_offload_del(struct nf_f + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + nf_flow_offload_rhash_params); +- +- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); +- +- if (nf_flow_has_expired(flow)) +- flow_offload_fixup_ct(flow->ct); +- else +- flow_offload_fixup_ct_timeout(flow->ct); +- + flow_offload_free(flow); + } + + void flow_offload_teardown(struct flow_offload *flow) + { ++ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); + set_bit(NF_FLOW_TEARDOWN, &flow->flags); +- +- flow_offload_fixup_ct_state(flow->ct); ++ flow_offload_fixup_ct(flow->ct); + } + EXPORT_SYMBOL_GPL(flow_offload_teardown); + +@@ -437,7 +418,7 @@ static void nf_flow_offload_gc_step(stru + + if (nf_flow_has_expired(flow) || + nf_ct_is_dying(flow->ct)) +- set_bit(NF_FLOW_TEARDOWN, &flow->flags); ++ flow_offload_teardown(flow); + + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { + if (test_bit(NF_FLOW_HW, &flow->flags)) { +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -268,6 +268,12 @@ static bool nft_flow_offload_skip(struct + return false; + } + ++static bool nf_conntrack_tcp_established(const struct nf_conn *ct) ++{ ++ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED && ++ test_bit(IPS_ASSURED_BIT, &ct->status); ++} ++ + static void nft_flow_offload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +@@ -293,7 +299,8 @@ static void nft_flow_offload_eval(const + case IPPROTO_TCP: + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, + sizeof(_tcph), &_tcph); +- if (unlikely(!tcph || tcph->fin || tcph->rst)) ++ if (unlikely(!tcph || tcph->fin || tcph->rst || ++ !nf_conntrack_tcp_established(ct))) + goto out; + break; + case IPPROTO_UDP: |