diff options
Diffstat (limited to 'target/linux/generic/backport-4.19/320-v4.16-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch')
-rw-r--r-- | target/linux/generic/backport-4.19/320-v4.16-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/target/linux/generic/backport-4.19/320-v4.16-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch b/target/linux/generic/backport-4.19/320-v4.16-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch new file mode 100644 index 0000000000..7788dd1434 --- /dev/null +++ b/target/linux/generic/backport-4.19/320-v4.16-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch @@ -0,0 +1,169 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:03:56 +0100 +Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit + +This new bit tells us that the conntrack entry is owned by the flow +table offload infrastructure. + + # cat /proc/net/nf_conntrack + ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 + +Note the [OFFLOAD] tag in the listing. + +The timer of such conntrack entries look like stopped from userspace. +In practise, to make sure the conntrack entry does not go away, the +conntrack timer is periodically set to an arbitrary large value that +gets refreshed on every iteration from the garbage collector, so it +never expires- and they display no internal state in the case of TCP +flows. This allows us to save a bitcheck from the packet path via +nf_ct_is_expired(). + +Conntrack entries that have been offloaded to the flow table +infrastructure cannot be deleted/flushed via ctnetlink. The flow table +infrastructure is also responsible for releasing this conntrack entry. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/uapi/linux/netfilter/nf_conntrack_common.h ++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h +@@ -101,12 +101,16 @@ enum ip_conntrack_status { + IPS_HELPER_BIT = 13, + IPS_HELPER = (1 << IPS_HELPER_BIT), + ++ /* Conntrack has been offloaded to flow table. */ ++ IPS_OFFLOAD_BIT = 14, ++ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), ++ + /* Be careful here, modifying these bits can make things messy, + * so don't let users modify them directly. + */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | +- IPS_SEQ_ADJUST | IPS_TEMPLATE), ++ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), + + __IPS_MAX_BIT = 14, + }; +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru + hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + ++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) ++ continue; ++ + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; +@@ -978,6 +981,18 @@ static bool gc_worker_can_early_drop(con + return false; + } + ++#define DAY (86400 * HZ) ++ ++/* Set an arbitrary timeout large enough not to ever expire, this save ++ * us a check for the IPS_OFFLOAD_BIT from the packet path via ++ * nf_ct_is_expired(). ++ */ ++static void nf_ct_offload_timeout(struct nf_conn *ct) ++{ ++ if (nf_ct_expires(ct) < DAY / 2) ++ ct->timeout = nfct_time_stamp + DAY; ++} ++ + static void gc_worker(struct work_struct *work) + { + unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); +@@ -1014,6 +1029,11 @@ static void gc_worker(struct work_struct + tmp = nf_ct_tuplehash_to_ctrack(h); + + scanned++; ++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { ++ nf_ct_offload_timeout(tmp); ++ continue; ++ } ++ + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + expired_count++; +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -1120,6 +1120,14 @@ static const struct nla_policy ct_nla_po + .len = NF_CT_LABELS_MAX_SIZE }, + }; + ++static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) ++{ ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return 0; ++ ++ return ctnetlink_filter_match(ct, data); ++} ++ + static int ctnetlink_flush_conntrack(struct net *net, + const struct nlattr * const cda[], + u32 portid, int report) +@@ -1132,7 +1140,7 @@ static int ctnetlink_flush_conntrack(str + return PTR_ERR(filter); + } + +- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, ++ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, + portid, report); + kfree(filter); + +@@ -1178,6 +1186,11 @@ static int ctnetlink_del_conntrack(struc + + ct = nf_ct_tuplehash_to_ctrack(h); + ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { ++ nf_ct_put(ct); ++ return -EBUSY; ++ } ++ + if (cda[CTA_ID]) { + u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); + if (id != (u32)(unsigned long)ct) { +--- a/net/netfilter/nf_conntrack_proto_tcp.c ++++ b/net/netfilter/nf_conntrack_proto_tcp.c +@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c + /* Print out the private part of the conntrack. */ + static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) + { ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return; ++ + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); + } + #endif +--- a/net/netfilter/nf_conntrack_standalone.c ++++ b/net/netfilter/nf_conntrack_standalone.c +@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file * + WARN_ON(!l4proto); + + ret = -ENOSPC; +- seq_printf(s, "%-8s %u %-8s %u %ld ", ++ seq_printf(s, "%-8s %u %-8s %u ", + l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), +- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), +- nf_ct_expires(ct) / HZ); ++ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); ++ ++ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); + + if (l4proto->print_conntrack) + l4proto->print_conntrack(s, ct); +@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file * + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) + goto release; + +- if (test_bit(IPS_ASSURED_BIT, &ct->status)) ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ seq_puts(s, "[OFFLOAD] "); ++ else if (test_bit(IPS_ASSURED_BIT, &ct->status)) + seq_puts(s, "[ASSURED] "); + + if (seq_has_overflowed(s)) |