aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch')
-rw-r--r--target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch169
1 files changed, 169 insertions, 0 deletions
diff --git a/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch b/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch
new file mode 100644
index 0000000000..9d6ce98080
--- /dev/null
+++ b/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch
@@ -0,0 +1,169 @@
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 7 Jan 2018 01:03:56 +0100
+Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit
+
+This new bit tells us that the conntrack entry is owned by the flow
+table offload infrastructure.
+
+ # cat /proc/net/nf_conntrack
+ ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2
+
+Note the [OFFLOAD] tag in the listing.
+
+The timer of such conntrack entries look like stopped from userspace.
+In practise, to make sure the conntrack entry does not go away, the
+conntrack timer is periodically set to an arbitrary large value that
+gets refreshed on every iteration from the garbage collector, so it
+never expires- and they display no internal state in the case of TCP
+flows. This allows us to save a bitcheck from the packet path via
+nf_ct_is_expired().
+
+Conntrack entries that have been offloaded to the flow table
+infrastructure cannot be deleted/flushed via ctnetlink. The flow table
+infrastructure is also responsible for releasing this conntrack entry.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
+@@ -101,12 +101,16 @@ enum ip_conntrack_status {
+ IPS_HELPER_BIT = 13,
+ IPS_HELPER = (1 << IPS_HELPER_BIT),
+
++ /* Conntrack has been offloaded to flow table. */
++ IPS_OFFLOAD_BIT = 14,
++ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
++
+ /* Be careful here, modifying these bits can make things messy,
+ * so don't let users modify them directly.
+ */
+ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+ IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+- IPS_SEQ_ADJUST | IPS_TEMPLATE),
++ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+
+ __IPS_MAX_BIT = 14,
+ };
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru
+ hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
++ continue;
++
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(con
+ return false;
+ }
+
++#define DAY (86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static void nf_ct_offload_timeout(struct nf_conn *ct)
++{
++ if (nf_ct_expires(ct) < DAY / 2)
++ ct->timeout = nfct_time_stamp + DAY;
++}
++
+ static void gc_worker(struct work_struct *work)
+ {
+ unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
+@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
++ nf_ct_offload_timeout(tmp);
++ continue;
++ }
++
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_po
+ .len = NF_CT_LABELS_MAX_SIZE },
+ };
+
++static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
++{
++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++ return 0;
++
++ return ctnetlink_filter_match(ct, data);
++}
++
+ static int ctnetlink_flush_conntrack(struct net *net,
+ const struct nlattr * const cda[],
+ u32 portid, int report)
+@@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(str
+ return PTR_ERR(filter);
+ }
+
+- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
++ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
+ portid, report);
+ kfree(filter);
+
+@@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struc
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
++ nf_ct_put(ct);
++ return -EBUSY;
++ }
++
+ if (cda[CTA_ID]) {
+ u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
+ if (id != (u32)(unsigned long)ct) {
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c
+ /* Print out the private part of the conntrack. */
+ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
+ {
++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++ return;
++
+ seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
+ }
+ #endif
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *
+ WARN_ON(!l4proto);
+
+ ret = -ENOSPC;
+- seq_printf(s, "%-8s %u %-8s %u %ld ",
++ seq_printf(s, "%-8s %u %-8s %u ",
+ l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
+- nf_ct_expires(ct) / HZ);
++ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
++
++ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
++ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
+
+ if (l4proto->print_conntrack)
+ l4proto->print_conntrack(s, ct);
+@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ goto release;
+
+- if (test_bit(IPS_ASSURED_BIT, &ct->status))
++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++ seq_puts(s, "[OFFLOAD] ");
++ else if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ seq_puts(s, "[ASSURED] ");
+
+ if (seq_has_overflowed(s))