diff options
author | Felix Fietkau <nbd@nbd.name> | 2018-02-05 13:35:24 +0100 |
---|---|---|
committer | Felix Fietkau <nbd@nbd.name> | 2018-02-21 20:12:42 +0100 |
commit | 103335644265d96c656a7de3d5994fbd11246300 (patch) | |
tree | 2b19dea75e812b8240d6a458f0ed6dd22a8148b2 /target/linux/generic/backport-4.14 | |
parent | b7265c59ab7dd0ec5dccb96e7b0dc1432404feb7 (diff) | |
download | upstream-103335644265d96c656a7de3d5994fbd11246300.tar.gz upstream-103335644265d96c656a7de3d5994fbd11246300.tar.bz2 upstream-103335644265d96c656a7de3d5994fbd11246300.zip |
kernel: backport netfilter NAT offload support to 4.14
This only works with nftables for now, iptables support will be added
later. Includes a number of related upstream nftables improvements to
simplify backporting follow-up changes
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Diffstat (limited to 'target/linux/generic/backport-4.14')
41 files changed, 11304 insertions, 0 deletions
diff --git a/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch b/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch new file mode 100644 index 0000000000..9d6ce98080 --- /dev/null +++ b/target/linux/generic/backport-4.14/320-netfilter-nf_conntrack-add-IPS_OFFLOAD-status-bit.patch @@ -0,0 +1,169 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:03:56 +0100 +Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit + +This new bit tells us that the conntrack entry is owned by the flow +table offload infrastructure. + + # cat /proc/net/nf_conntrack + ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 + +Note the [OFFLOAD] tag in the listing. + +The timer of such conntrack entries look like stopped from userspace. +In practise, to make sure the conntrack entry does not go away, the +conntrack timer is periodically set to an arbitrary large value that +gets refreshed on every iteration from the garbage collector, so it +never expires- and they display no internal state in the case of TCP +flows. This allows us to save a bitcheck from the packet path via +nf_ct_is_expired(). + +Conntrack entries that have been offloaded to the flow table +infrastructure cannot be deleted/flushed via ctnetlink. The flow table +infrastructure is also responsible for releasing this conntrack entry. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/uapi/linux/netfilter/nf_conntrack_common.h ++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h +@@ -101,12 +101,16 @@ enum ip_conntrack_status { + IPS_HELPER_BIT = 13, + IPS_HELPER = (1 << IPS_HELPER_BIT), + ++ /* Conntrack has been offloaded to flow table. */ ++ IPS_OFFLOAD_BIT = 14, ++ IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), ++ + /* Be careful here, modifying these bits can make things messy, + * so don't let users modify them directly. + */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | +- IPS_SEQ_ADJUST | IPS_TEMPLATE), ++ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), + + __IPS_MAX_BIT = 14, + }; +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru + hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + ++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) ++ continue; ++ + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; +@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(con + return false; + } + ++#define DAY (86400 * HZ) ++ ++/* Set an arbitrary timeout large enough not to ever expire, this save ++ * us a check for the IPS_OFFLOAD_BIT from the packet path via ++ * nf_ct_is_expired(). ++ */ ++static void nf_ct_offload_timeout(struct nf_conn *ct) ++{ ++ if (nf_ct_expires(ct) < DAY / 2) ++ ct->timeout = nfct_time_stamp + DAY; ++} ++ + static void gc_worker(struct work_struct *work) + { + unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); +@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct + tmp = nf_ct_tuplehash_to_ctrack(h); + + scanned++; ++ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { ++ nf_ct_offload_timeout(tmp); ++ continue; ++ } ++ + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + expired_count++; +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_po + .len = NF_CT_LABELS_MAX_SIZE }, + }; + ++static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) ++{ ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return 0; ++ ++ return ctnetlink_filter_match(ct, data); ++} ++ + static int ctnetlink_flush_conntrack(struct net *net, + const struct nlattr * const cda[], + u32 portid, int report) +@@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(str + return PTR_ERR(filter); + } + +- nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, ++ nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, + portid, report); + kfree(filter); + +@@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struc + + ct = nf_ct_tuplehash_to_ctrack(h); + ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { ++ nf_ct_put(ct); ++ return -EBUSY; ++ } ++ + if (cda[CTA_ID]) { + u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); + if (id != (u32)(unsigned long)ct) { +--- a/net/netfilter/nf_conntrack_proto_tcp.c ++++ b/net/netfilter/nf_conntrack_proto_tcp.c +@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c + /* Print out the private part of the conntrack. */ + static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) + { ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return; ++ + seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); + } + #endif +--- a/net/netfilter/nf_conntrack_standalone.c ++++ b/net/netfilter/nf_conntrack_standalone.c +@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file * + WARN_ON(!l4proto); + + ret = -ENOSPC; +- seq_printf(s, "%-8s %u %-8s %u %ld ", ++ seq_printf(s, "%-8s %u %-8s %u ", + l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), +- l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), +- nf_ct_expires(ct) / HZ); ++ l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); ++ ++ if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); + + if (l4proto->print_conntrack) + l4proto->print_conntrack(s, ct); +@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file * + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) + goto release; + +- if (test_bit(IPS_ASSURED_BIT, &ct->status)) ++ if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ seq_puts(s, "[OFFLOAD] "); ++ else if (test_bit(IPS_ASSURED_BIT, &ct->status)) + seq_puts(s, "[ASSURED] "); + + if (seq_has_overflowed(s)) diff --git a/target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch b/target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch new file mode 100644 index 0000000000..8a0d2f0fb7 --- /dev/null +++ b/target/linux/generic/backport-4.14/321-netfilter-nf_tables-add-flow-table-netlink-frontend.patch @@ -0,0 +1,1079 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:07 +0100 +Subject: [PATCH] netfilter: nf_tables: add flow table netlink frontend + +This patch introduces a netlink control plane to create, delete and dump +flow tables. Flow tables are identified by name, this name is used from +rules to refer to an specific flow table. Flow tables use the rhashtable +class and a generic garbage collector to remove expired entries. + +This also adds the infrastructure to add different flow table types, so +we can add one for each layer 3 protocol family. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 include/net/netfilter/nf_flow_table.h + +--- /dev/null ++++ b/include/net/netfilter/nf_flow_table.h +@@ -0,0 +1,23 @@ ++#ifndef _NF_FLOW_TABLE_H ++#define _NF_FLOW_TABLE_H ++ ++#include <linux/rhashtable.h> ++ ++struct nf_flowtable; ++ ++struct nf_flowtable_type { ++ struct list_head list; ++ int family; ++ void (*gc)(struct work_struct *work); ++ const struct rhashtable_params *params; ++ nf_hookfn *hook; ++ struct module *owner; ++}; ++ ++struct nf_flowtable { ++ struct rhashtable rhashtable; ++ const struct nf_flowtable_type *type; ++ struct delayed_work gc_work; ++}; ++ ++#endif /* _FLOW_OFFLOAD_H */ +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -9,6 +9,7 @@ + #include <linux/netfilter/x_tables.h> + #include <linux/netfilter/nf_tables.h> + #include <linux/u64_stats_sync.h> ++#include <net/netfilter/nf_flow_table.h> + #include <net/netlink.h> + + #define NFT_JUMP_STACK_SIZE 16 +@@ -933,6 +934,7 @@ unsigned int nft_do_chain(struct nft_pkt + * @chains: chains in the table + * @sets: sets in the table + * @objects: stateful objects in the table ++ * @flowtables: flow tables in the table + * @hgenerator: handle generator state + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) +@@ -944,6 +946,7 @@ struct nft_table { + struct list_head chains; + struct list_head sets; + struct list_head objects; ++ struct list_head flowtables; + u64 hgenerator; + u32 use; + u16 flags:14, +@@ -1075,6 +1078,44 @@ int nft_register_obj(struct nft_object_t + void nft_unregister_obj(struct nft_object_type *obj_type); + + /** ++ * struct nft_flowtable - nf_tables flow table ++ * ++ * @list: flow table list node in table list ++ * @table: the table the flow table is contained in ++ * @name: name of this flow table ++ * @hooknum: hook number ++ * @priority: hook priority ++ * @ops_len: number of hooks in array ++ * @genmask: generation mask ++ * @use: number of references to this flow table ++ * @data: rhashtable and garbage collector ++ * @ops: array of hooks ++ */ ++struct nft_flowtable { ++ struct list_head list; ++ struct nft_table *table; ++ char *name; ++ int hooknum; ++ int priority; ++ int ops_len; ++ u32 genmask:2, ++ use:30; ++ /* runtime data below here */ ++ struct nf_hook_ops *ops ____cacheline_aligned; ++ struct nf_flowtable data; ++}; ++ ++struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, ++ const struct nlattr *nla, ++ u8 genmask); ++void nft_flow_table_iterate(struct net *net, ++ void (*iter)(struct nf_flowtable *flowtable, void *data), ++ void *data); ++ ++void nft_register_flowtable_type(struct nf_flowtable_type *type); ++void nft_unregister_flowtable_type(struct nf_flowtable_type *type); ++ ++/** + * struct nft_traceinfo - nft tracing information and state + * + * @pkt: pktinfo currently processed +@@ -1310,4 +1351,11 @@ struct nft_trans_obj { + #define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) + ++struct nft_trans_flowtable { ++ struct nft_flowtable *flowtable; ++}; ++ ++#define nft_trans_flowtable(trans) \ ++ (((struct nft_trans_flowtable *)trans->data)->flowtable) ++ + #endif /* _NET_NF_TABLES_H */ +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -92,6 +92,9 @@ enum nft_verdicts { + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) ++ * @NFT_MSG_NEWFLOWTABLE: add new flow table (enum nft_flowtable_attributes) ++ * @NFT_MSG_GETFLOWTABLE: get flow table (enum nft_flowtable_attributes) ++ * @NFT_MSG_DELFLOWTABLE: delete flow table (enum nft_flowtable_attributes) + */ + enum nf_tables_msg_types { + NFT_MSG_NEWTABLE, +@@ -116,6 +119,9 @@ enum nf_tables_msg_types { + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, ++ NFT_MSG_NEWFLOWTABLE, ++ NFT_MSG_GETFLOWTABLE, ++ NFT_MSG_DELFLOWTABLE, + NFT_MSG_MAX, + }; + +@@ -1310,6 +1316,53 @@ enum nft_object_attributes { + #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + + /** ++ * enum nft_flowtable_attributes - nf_tables flow table netlink attributes ++ * ++ * @NFTA_FLOWTABLE_TABLE: name of the table containing the expression (NLA_STRING) ++ * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) ++ * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) ++ * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) ++ */ ++enum nft_flowtable_attributes { ++ NFTA_FLOWTABLE_UNSPEC, ++ NFTA_FLOWTABLE_TABLE, ++ NFTA_FLOWTABLE_NAME, ++ NFTA_FLOWTABLE_HOOK, ++ NFTA_FLOWTABLE_USE, ++ __NFTA_FLOWTABLE_MAX ++}; ++#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) ++ ++/** ++ * enum nft_flowtable_hook_attributes - nf_tables flow table hook netlink attributes ++ * ++ * @NFTA_FLOWTABLE_HOOK_NUM: netfilter hook number (NLA_U32) ++ * @NFTA_FLOWTABLE_HOOK_PRIORITY: netfilter hook priority (NLA_U32) ++ * @NFTA_FLOWTABLE_HOOK_DEVS: input devices this flow table is bound to (NLA_NESTED) ++ */ ++enum nft_flowtable_hook_attributes { ++ NFTA_FLOWTABLE_HOOK_UNSPEC, ++ NFTA_FLOWTABLE_HOOK_NUM, ++ NFTA_FLOWTABLE_HOOK_PRIORITY, ++ NFTA_FLOWTABLE_HOOK_DEVS, ++ __NFTA_FLOWTABLE_HOOK_MAX ++}; ++#define NFTA_FLOWTABLE_HOOK_MAX (__NFTA_FLOWTABLE_HOOK_MAX - 1) ++ ++/** ++ * enum nft_device_attributes - nf_tables device netlink attributes ++ * ++ * @NFTA_DEVICE_NAME: name of this device (NLA_STRING) ++ */ ++enum nft_devices_attributes { ++ NFTA_DEVICE_UNSPEC, ++ NFTA_DEVICE_NAME, ++ __NFTA_DEVICE_MAX ++}; ++#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1) ++ ++ ++/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -17,6 +17,7 @@ + #include <linux/netfilter.h> + #include <linux/netfilter/nfnetlink.h> + #include <linux/netfilter/nf_tables.h> ++#include <net/netfilter/nf_flow_table.h> + #include <net/netfilter/nf_tables_core.h> + #include <net/netfilter/nf_tables.h> + #include <net/net_namespace.h> +@@ -24,6 +25,7 @@ + + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); ++static LIST_HEAD(nf_tables_flowtables); + + /** + * nft_register_afinfo - register nf_tables address family info +@@ -345,6 +347,40 @@ static int nft_delobj(struct nft_ctx *ct + return err; + } + ++static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, ++ struct nft_flowtable *flowtable) ++{ ++ struct nft_trans *trans; ++ ++ trans = nft_trans_alloc(ctx, msg_type, ++ sizeof(struct nft_trans_flowtable)); ++ if (trans == NULL) ++ return -ENOMEM; ++ ++ if (msg_type == NFT_MSG_NEWFLOWTABLE) ++ nft_activate_next(ctx->net, flowtable); ++ ++ nft_trans_flowtable(trans) = flowtable; ++ list_add_tail(&trans->list, &ctx->net->nft.commit_list); ++ ++ return 0; ++} ++ ++static int nft_delflowtable(struct nft_ctx *ctx, ++ struct nft_flowtable *flowtable) ++{ ++ int err; ++ ++ err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); ++ if (err < 0) ++ return err; ++ ++ nft_deactivate_next(ctx->net, flowtable); ++ ctx->table->use--; ++ ++ return err; ++} ++ + /* + * Tables + */ +@@ -728,6 +764,7 @@ static int nf_tables_newtable(struct net + INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); ++ INIT_LIST_HEAD(&table->flowtables); + table->flags = flags; + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); +@@ -749,10 +786,11 @@ err1: + + static int nft_flush_table(struct nft_ctx *ctx) + { +- int err; ++ struct nft_flowtable *flowtable, *nft; + struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; + struct nft_set *set, *ns; ++ int err; + + list_for_each_entry(chain, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) +@@ -778,6 +816,12 @@ static int nft_flush_table(struct nft_ct + goto out; + } + ++ list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) { ++ err = nft_delflowtable(ctx, flowtable); ++ if (err < 0) ++ goto out; ++ } ++ + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { + err = nft_delobj(ctx, obj); + if (err < 0) +@@ -4765,6 +4809,605 @@ static void nf_tables_obj_notify(const s + ctx->afi->family, ctx->report, GFP_KERNEL); + } + ++/* ++ * Flow tables ++ */ ++void nft_register_flowtable_type(struct nf_flowtable_type *type) ++{ ++ nfnl_lock(NFNL_SUBSYS_NFTABLES); ++ list_add_tail_rcu(&type->list, &nf_tables_flowtables); ++ nfnl_unlock(NFNL_SUBSYS_NFTABLES); ++} ++EXPORT_SYMBOL_GPL(nft_register_flowtable_type); ++ ++void nft_unregister_flowtable_type(struct nf_flowtable_type *type) ++{ ++ nfnl_lock(NFNL_SUBSYS_NFTABLES); ++ list_del_rcu(&type->list); ++ nfnl_unlock(NFNL_SUBSYS_NFTABLES); ++} ++EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type); ++ ++static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { ++ [NFTA_FLOWTABLE_TABLE] = { .type = NLA_STRING, ++ .len = NFT_NAME_MAXLEN - 1 }, ++ [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, ++ .len = NFT_NAME_MAXLEN - 1 }, ++ [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, ++}; ++ ++struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, ++ const struct nlattr *nla, ++ u8 genmask) ++{ ++ struct nft_flowtable *flowtable; ++ ++ list_for_each_entry(flowtable, &table->flowtables, list) { ++ if (!nla_strcmp(nla, flowtable->name) && ++ nft_active_genmask(flowtable, genmask)) ++ return flowtable; ++ } ++ return ERR_PTR(-ENOENT); ++} ++EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); ++ ++#define NFT_FLOWTABLE_DEVICE_MAX 8 ++ ++static int nf_tables_parse_devices(const struct nft_ctx *ctx, ++ const struct nlattr *attr, ++ struct net_device *dev_array[], int *len) ++{ ++ const struct nlattr *tmp; ++ struct net_device *dev; ++ char ifname[IFNAMSIZ]; ++ int rem, n = 0, err; ++ ++ nla_for_each_nested(tmp, attr, rem) { ++ if (nla_type(tmp) != NFTA_DEVICE_NAME) { ++ err = -EINVAL; ++ goto err1; ++ } ++ ++ nla_strlcpy(ifname, tmp, IFNAMSIZ); ++ dev = dev_get_by_name(ctx->net, ifname); ++ if (!dev) { ++ err = -ENOENT; ++ goto err1; ++ } ++ ++ dev_array[n++] = dev; ++ if (n == NFT_FLOWTABLE_DEVICE_MAX) { ++ err = -EFBIG; ++ goto err1; ++ } ++ } ++ if (!len) ++ return -EINVAL; ++ ++ err = 0; ++err1: ++ *len = n; ++ return err; ++} ++ ++static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = { ++ [NFTA_FLOWTABLE_HOOK_NUM] = { .type = NLA_U32 }, ++ [NFTA_FLOWTABLE_HOOK_PRIORITY] = { .type = NLA_U32 }, ++ [NFTA_FLOWTABLE_HOOK_DEVS] = { .type = NLA_NESTED }, ++}; ++ ++static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx, ++ const struct nlattr *attr, ++ struct nft_flowtable *flowtable) ++{ ++ struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX]; ++ struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1]; ++ struct nf_hook_ops *ops; ++ int hooknum, priority; ++ int err, n = 0, i; ++ ++ err = nla_parse_nested(tb, NFTA_FLOWTABLE_HOOK_MAX, attr, ++ nft_flowtable_hook_policy, NULL); ++ if (err < 0) ++ return err; ++ ++ if (!tb[NFTA_FLOWTABLE_HOOK_NUM] || ++ !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] || ++ !tb[NFTA_FLOWTABLE_HOOK_DEVS]) ++ return -EINVAL; ++ ++ hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); ++ if (hooknum >= ctx->afi->nhooks) ++ return -EINVAL; ++ ++ priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); ++ ++ err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS], ++ dev_array, &n); ++ if (err < 0) ++ goto err1; ++ ++ ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); ++ if (!ops) { ++ err = -ENOMEM; ++ goto err1; ++ } ++ ++ flowtable->ops = ops; ++ flowtable->ops_len = n; ++ ++ for (i = 0; i < n; i++) { ++ flowtable->ops[i].pf = NFPROTO_NETDEV; ++ flowtable->ops[i].hooknum = hooknum; ++ flowtable->ops[i].priority = priority; ++ flowtable->ops[i].priv = &flowtable->data.rhashtable; ++ flowtable->ops[i].hook = flowtable->data.type->hook; ++ flowtable->ops[i].dev = dev_array[i]; ++ } ++ ++ err = 0; ++err1: ++ for (i = 0; i < n; i++) ++ dev_put(dev_array[i]); ++ ++ return err; ++} ++ ++static const struct nf_flowtable_type * ++__nft_flowtable_type_get(const struct nft_af_info *afi) ++{ ++ const struct nf_flowtable_type *type; ++ ++ list_for_each_entry(type, &nf_tables_flowtables, list) { ++ if (afi->family == type->family) ++ return type; ++ } ++ return NULL; ++} ++ ++static const struct nf_flowtable_type * ++nft_flowtable_type_get(const struct nft_af_info *afi) ++{ ++ const struct nf_flowtable_type *type; ++ ++ type = __nft_flowtable_type_get(afi); ++ if (type != NULL && try_module_get(type->owner)) ++ return type; ++ ++#ifdef CONFIG_MODULES ++ if (type == NULL) { ++ nfnl_unlock(NFNL_SUBSYS_NFTABLES); ++ request_module("nf-flowtable-%u", afi->family); ++ nfnl_lock(NFNL_SUBSYS_NFTABLES); ++ if (__nft_flowtable_type_get(afi)) ++ return ERR_PTR(-EAGAIN); ++ } ++#endif ++ return ERR_PTR(-ENOENT); ++} ++ ++void nft_flow_table_iterate(struct net *net, ++ void (*iter)(struct nf_flowtable *flowtable, void *data), ++ void *data) ++{ ++ struct nft_flowtable *flowtable; ++ const struct nft_af_info *afi; ++ const struct nft_table *table; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(afi, &net->nft.af_info, list) { ++ list_for_each_entry_rcu(table, &afi->tables, list) { ++ list_for_each_entry_rcu(flowtable, &table->flowtables, list) { ++ iter(&flowtable->data, data); ++ } ++ } ++ } ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nft_flow_table_iterate); ++ ++static void nft_unregister_flowtable_net_hooks(struct net *net, ++ struct nft_flowtable *flowtable) ++{ ++ int i; ++ ++ for (i = 0; i < flowtable->ops_len; i++) { ++ if (!flowtable->ops[i].dev) ++ continue; ++ ++ nf_unregister_net_hook(net, &flowtable->ops[i]); ++ } ++} ++ ++static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, ++ struct sk_buff *skb, ++ const struct nlmsghdr *nlh, ++ const struct nlattr * const nla[], ++ struct netlink_ext_ack *extack) ++{ ++ const struct nfgenmsg *nfmsg = nlmsg_data(nlh); ++ const struct nf_flowtable_type *type; ++ u8 genmask = nft_genmask_next(net); ++ int family = nfmsg->nfgen_family; ++ struct nft_flowtable *flowtable; ++ struct nft_af_info *afi; ++ struct nft_table *table; ++ struct nft_ctx ctx; ++ int err, i, k; ++ ++ if (!nla[NFTA_FLOWTABLE_TABLE] || ++ !nla[NFTA_FLOWTABLE_NAME] || ++ !nla[NFTA_FLOWTABLE_HOOK]) ++ return -EINVAL; ++ ++ afi = nf_tables_afinfo_lookup(net, family, true); ++ if (IS_ERR(afi)) ++ return PTR_ERR(afi); ++ ++ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ if (IS_ERR(table)) ++ return PTR_ERR(table); ++ ++ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], ++ genmask); ++ if (IS_ERR(flowtable)) { ++ err = PTR_ERR(flowtable); ++ if (err != -ENOENT) ++ return err; ++ } else { ++ if (nlh->nlmsg_flags & NLM_F_EXCL) ++ return -EEXIST; ++ ++ return 0; ++ } ++ ++ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ ++ flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); ++ if (!flowtable) ++ return -ENOMEM; ++ ++ flowtable->table = table; ++ flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); ++ if (!flowtable->name) { ++ err = -ENOMEM; ++ goto err1; ++ } ++ ++ type = nft_flowtable_type_get(afi); ++ if (IS_ERR(type)) { ++ err = PTR_ERR(type); ++ goto err2; ++ } ++ ++ flowtable->data.type = type; ++ err = rhashtable_init(&flowtable->data.rhashtable, type->params); ++ if (err < 0) ++ goto err3; ++ ++ err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], ++ flowtable); ++ if (err < 0) ++ goto err3; ++ ++ for (i = 0; i < flowtable->ops_len; i++) { ++ err = nf_register_net_hook(net, &flowtable->ops[i]); ++ if (err < 0) ++ goto err4; ++ } ++ ++ err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); ++ if (err < 0) ++ goto err5; ++ ++ INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc); ++ queue_delayed_work(system_power_efficient_wq, ++ &flowtable->data.gc_work, HZ); ++ ++ list_add_tail_rcu(&flowtable->list, &table->flowtables); ++ table->use++; ++ ++ return 0; ++err5: ++ i = flowtable->ops_len; ++err4: ++ for (k = i - 1; k >= 0; k--) ++ nf_unregister_net_hook(net, &flowtable->ops[i]); ++ ++ kfree(flowtable->ops); ++err3: ++ module_put(type->owner); ++err2: ++ kfree(flowtable->name); ++err1: ++ kfree(flowtable); ++ return err; ++} ++ ++static int nf_tables_delflowtable(struct net *net, struct sock *nlsk, ++ struct sk_buff *skb, ++ const struct nlmsghdr *nlh, ++ const struct nlattr * const nla[], ++ struct netlink_ext_ack *extack) ++{ ++ const struct nfgenmsg *nfmsg = nlmsg_data(nlh); ++ u8 genmask = nft_genmask_next(net); ++ int family = nfmsg->nfgen_family; ++ struct nft_flowtable *flowtable; ++ struct nft_af_info *afi; ++ struct nft_table *table; ++ struct nft_ctx ctx; ++ ++ afi = nf_tables_afinfo_lookup(net, family, true); ++ if (IS_ERR(afi)) ++ return PTR_ERR(afi); ++ ++ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ if (IS_ERR(table)) ++ return PTR_ERR(table); ++ ++ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], ++ genmask); ++ if (IS_ERR(flowtable)) ++ return PTR_ERR(flowtable); ++ if (flowtable->use > 0) ++ return -EBUSY; ++ ++ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ ++ return nft_delflowtable(&ctx, flowtable); ++} ++ ++static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, ++ u32 portid, u32 seq, int event, ++ u32 flags, int family, ++ struct nft_flowtable *flowtable) ++{ ++ struct nlattr *nest, *nest_devs; ++ struct nfgenmsg *nfmsg; ++ struct nlmsghdr *nlh; ++ int i; ++ ++ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); ++ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); ++ if (nlh == NULL) ++ goto nla_put_failure; ++ ++ nfmsg = nlmsg_data(nlh); ++ nfmsg->nfgen_family = family; ++ nfmsg->version = NFNETLINK_V0; ++ nfmsg->res_id = htons(net->nft.base_seq & 0xffff); ++ ++ if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || ++ nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || ++ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use))) ++ goto nla_put_failure; ++ ++ nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); ++ if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || ++ nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) ++ goto nla_put_failure; ++ ++ nest_devs = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK_DEVS); ++ if (!nest_devs) ++ goto nla_put_failure; ++ ++ for (i = 0; i < flowtable->ops_len; i++) { ++ if (flowtable->ops[i].dev && ++ nla_put_string(skb, NFTA_DEVICE_NAME, ++ flowtable->ops[i].dev->name)) ++ goto nla_put_failure; ++ } ++ nla_nest_end(skb, nest_devs); ++ nla_nest_end(skb, nest); ++ ++ nlmsg_end(skb, nlh); ++ return 0; ++ ++nla_put_failure: ++ nlmsg_trim(skb, nlh); ++ return -1; ++} ++ ++struct nft_flowtable_filter { ++ char *table; ++}; ++ ++static int nf_tables_dump_flowtable(struct sk_buff *skb, ++ struct netlink_callback *cb) ++{ ++ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); ++ struct nft_flowtable_filter *filter = cb->data; ++ unsigned int idx = 0, s_idx = cb->args[0]; ++ struct net *net = sock_net(skb->sk); ++ int family = nfmsg->nfgen_family; ++ struct nft_flowtable *flowtable; ++ const struct nft_af_info *afi; ++ const struct nft_table *table; ++ ++ rcu_read_lock(); ++ cb->seq = net->nft.base_seq; ++ ++ list_for_each_entry_rcu(afi, &net->nft.af_info, list) { ++ if (family != NFPROTO_UNSPEC && family != afi->family) ++ continue; ++ ++ list_for_each_entry_rcu(table, &afi->tables, list) { ++ list_for_each_entry_rcu(flowtable, &table->flowtables, list) { ++ if (!nft_is_active(net, flowtable)) ++ goto cont; ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (filter && filter->table[0] && ++ strcmp(filter->table, table->name)) ++ goto cont; ++ ++ if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWFLOWTABLE, ++ NLM_F_MULTI | NLM_F_APPEND, ++ afi->family, flowtable) < 0) ++ goto done; ++ ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++cont: ++ idx++; ++ } ++ } ++ } ++done: ++ rcu_read_unlock(); ++ ++ cb->args[0] = idx; ++ return skb->len; ++} ++ ++static int nf_tables_dump_flowtable_done(struct netlink_callback *cb) ++{ ++ struct nft_flowtable_filter *filter = cb->data; ++ ++ if (!filter) ++ return 0; ++ ++ kfree(filter->table); ++ kfree(filter); ++ ++ return 0; ++} ++ ++static struct nft_flowtable_filter * ++nft_flowtable_filter_alloc(const struct nlattr * const nla[]) ++{ ++ struct nft_flowtable_filter *filter; ++ ++ filter = kzalloc(sizeof(*filter), GFP_KERNEL); ++ if (!filter) ++ return ERR_PTR(-ENOMEM); ++ ++ if (nla[NFTA_FLOWTABLE_TABLE]) { ++ filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], ++ GFP_KERNEL); ++ if (!filter->table) { ++ kfree(filter); ++ return ERR_PTR(-ENOMEM); ++ } ++ } ++ return filter; ++} ++ ++static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, ++ struct sk_buff *skb, ++ const struct nlmsghdr *nlh, ++ const struct nlattr * const nla[], ++ struct netlink_ext_ack *extack) ++{ ++ const struct nfgenmsg *nfmsg = nlmsg_data(nlh); ++ u8 genmask = nft_genmask_cur(net); ++ int family = nfmsg->nfgen_family; ++ struct nft_flowtable *flowtable; ++ const struct nft_af_info *afi; ++ const struct nft_table *table; ++ struct sk_buff *skb2; ++ int err; ++ ++ if (nlh->nlmsg_flags & NLM_F_DUMP) { ++ struct netlink_dump_control c = { ++ .dump = nf_tables_dump_flowtable, ++ .done = nf_tables_dump_flowtable_done, ++ }; ++ ++ if (nla[NFTA_FLOWTABLE_TABLE]) { ++ struct nft_flowtable_filter *filter; ++ ++ filter = nft_flowtable_filter_alloc(nla); ++ if (IS_ERR(filter)) ++ return -ENOMEM; ++ ++ c.data = filter; ++ } ++ return netlink_dump_start(nlsk, skb, nlh, &c); ++ } ++ ++ if (!nla[NFTA_FLOWTABLE_NAME]) ++ return -EINVAL; ++ ++ afi = nf_tables_afinfo_lookup(net, family, false); ++ if (IS_ERR(afi)) ++ return PTR_ERR(afi); ++ ++ table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ if (IS_ERR(table)) ++ return PTR_ERR(table); ++ ++ flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], ++ genmask); ++ if (IS_ERR(table)) ++ return PTR_ERR(flowtable); ++ ++ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); ++ if (!skb2) ++ return -ENOMEM; ++ ++ err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid, ++ nlh->nlmsg_seq, ++ NFT_MSG_NEWFLOWTABLE, 0, family, ++ flowtable); ++ if (err < 0) ++ goto err; ++ ++ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); ++err: ++ kfree_skb(skb2); ++ return err; ++} ++ ++static void nf_tables_flowtable_notify(struct nft_ctx *ctx, ++ struct nft_flowtable *flowtable, ++ int event) ++{ ++ struct sk_buff *skb; ++ int err; ++ ++ if (ctx->report && ++ !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) ++ return; ++ ++ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); ++ if (skb == NULL) ++ goto err; ++ ++ err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, ++ ctx->seq, event, 0, ++ ctx->afi->family, flowtable); ++ if (err < 0) { ++ kfree_skb(skb); ++ goto err; ++ } ++ ++ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, ++ ctx->report, GFP_KERNEL); ++ return; ++err: ++ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); ++} ++ ++static void nft_flowtable_destroy(void *ptr, void *arg) ++{ ++ kfree(ptr); ++} ++ ++static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) ++{ ++ cancel_delayed_work_sync(&flowtable->data.gc_work); ++ kfree(flowtable->name); ++ rhashtable_free_and_destroy(&flowtable->data.rhashtable, ++ nft_flowtable_destroy, NULL); ++ module_put(flowtable->data.type->owner); ++} ++ + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq) + { +@@ -4795,6 +5438,49 @@ nla_put_failure: + return -EMSGSIZE; + } + ++static void nft_flowtable_event(unsigned long event, struct net_device *dev, ++ struct nft_flowtable *flowtable) ++{ ++ int i; ++ ++ for (i = 0; i < flowtable->ops_len; i++) { ++ if (flowtable->ops[i].dev != dev) ++ continue; ++ ++ nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]); ++ flowtable->ops[i].dev = NULL; ++ break; ++ } ++} ++ ++static int nf_tables_flowtable_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct nft_flowtable *flowtable; ++ struct nft_table *table; ++ struct nft_af_info *afi; ++ ++ if (event != NETDEV_UNREGISTER) ++ return 0; ++ ++ nfnl_lock(NFNL_SUBSYS_NFTABLES); ++ list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { ++ list_for_each_entry(table, &afi->tables, list) { ++ list_for_each_entry(flowtable, &table->flowtables, list) { ++ nft_flowtable_event(event, dev, flowtable); ++ } ++ } ++ } ++ nfnl_unlock(NFNL_SUBSYS_NFTABLES); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block nf_tables_flowtable_notifier = { ++ .notifier_call = nf_tables_flowtable_event, ++}; ++ + static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, + int event) + { +@@ -4947,6 +5633,21 @@ static const struct nfnl_callback nf_tab + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, ++ [NFT_MSG_NEWFLOWTABLE] = { ++ .call_batch = nf_tables_newflowtable, ++ .attr_count = NFTA_FLOWTABLE_MAX, ++ .policy = nft_flowtable_policy, ++ }, ++ [NFT_MSG_GETFLOWTABLE] = { ++ .call = nf_tables_getflowtable, ++ .attr_count = NFTA_FLOWTABLE_MAX, ++ .policy = nft_flowtable_policy, ++ }, ++ [NFT_MSG_DELFLOWTABLE] = { ++ .call_batch = nf_tables_delflowtable, ++ .attr_count = NFTA_FLOWTABLE_MAX, ++ .policy = nft_flowtable_policy, ++ }, + }; + + static void nft_chain_commit_update(struct nft_trans *trans) +@@ -4992,6 +5693,9 @@ static void nf_tables_commit_release(str + case NFT_MSG_DELOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; ++ case NFT_MSG_DELFLOWTABLE: ++ nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); ++ break; + } + kfree(trans); + } +@@ -5109,6 +5813,21 @@ static int nf_tables_commit(struct net * + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_DELOBJ); + break; ++ case NFT_MSG_NEWFLOWTABLE: ++ nft_clear(net, nft_trans_flowtable(trans)); ++ nf_tables_flowtable_notify(&trans->ctx, ++ nft_trans_flowtable(trans), ++ NFT_MSG_NEWFLOWTABLE); ++ nft_trans_destroy(trans); ++ break; ++ case NFT_MSG_DELFLOWTABLE: ++ list_del_rcu(&nft_trans_flowtable(trans)->list); ++ nf_tables_flowtable_notify(&trans->ctx, ++ nft_trans_flowtable(trans), ++ NFT_MSG_DELFLOWTABLE); ++ nft_unregister_flowtable_net_hooks(net, ++ nft_trans_flowtable(trans)); ++ break; + } + } + +@@ -5146,6 +5865,9 @@ static void nf_tables_abort_release(stru + case NFT_MSG_NEWOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; ++ case NFT_MSG_NEWFLOWTABLE: ++ nf_tables_flowtable_destroy(nft_trans_flowtable(trans)); ++ break; + } + kfree(trans); + } +@@ -5235,6 +5957,17 @@ static int nf_tables_abort(struct net *n + nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_trans_destroy(trans); + break; ++ case NFT_MSG_NEWFLOWTABLE: ++ trans->ctx.table->use--; ++ list_del_rcu(&nft_trans_flowtable(trans)->list); ++ nft_unregister_flowtable_net_hooks(net, ++ nft_trans_flowtable(trans)); ++ break; ++ case NFT_MSG_DELFLOWTABLE: ++ trans->ctx.table->use++; ++ nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); ++ nft_trans_destroy(trans); ++ break; + } + } + +@@ -5785,6 +6518,7 @@ EXPORT_SYMBOL_GPL(__nft_release_basechai + /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ + static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) + { ++ struct nft_flowtable *flowtable, *nf; + struct nft_table *table, *nt; + struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; +@@ -5798,6 +6532,9 @@ static void __nft_release_afinfo(struct + list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); ++ list_for_each_entry(flowtable, &table->flowtables, list) ++ nf_unregister_net_hooks(net, flowtable->ops, ++ flowtable->ops_len); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { +@@ -5808,6 +6545,11 @@ static void __nft_release_afinfo(struct + nf_tables_rule_destroy(&ctx, rule); + } + } ++ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { ++ list_del(&flowtable->list); ++ table->use--; ++ nf_tables_flowtable_destroy(flowtable); ++ } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + table->use--; +@@ -5851,6 +6593,8 @@ static int __init nf_tables_module_init( + if (err < 0) + goto err3; + ++ register_netdevice_notifier(&nf_tables_flowtable_notifier); ++ + pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n"); + return register_pernet_subsys(&nf_tables_net_ops); + err3: +@@ -5865,6 +6609,7 @@ static void __exit nf_tables_module_exit + { + unregister_pernet_subsys(&nf_tables_net_ops); + nfnetlink_subsys_unregister(&nf_tables_subsys); ++ unregister_netdevice_notifier(&nf_tables_flowtable_notifier); + rcu_barrier(); + nf_tables_core_module_exit(); + kfree(info); diff --git a/target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch b/target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch new file mode 100644 index 0000000000..d811ef006c --- /dev/null +++ b/target/linux/generic/backport-4.14/322-netfilter-add-generic-flow-table-infrastructure.patch @@ -0,0 +1,586 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:11 +0100 +Subject: [PATCH] netfilter: add generic flow table infrastructure + +This patch defines the API to interact with flow tables, this allows to +add, delete and lookup for entries in the flow table. This also adds the +generic garbage code that removes entries that have expired, ie. no +traffic has been seen for a while. + +Users of the flow table infrastructure can delete entries via +flow_offload_dead(), which sets the dying bit, this signals the garbage +collector to release an entry from user context. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/netfilter/nf_flow_table.c + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -1,7 +1,12 @@ + #ifndef _NF_FLOW_TABLE_H + #define _NF_FLOW_TABLE_H + ++#include <linux/in.h> ++#include <linux/in6.h> ++#include <linux/netdevice.h> + #include <linux/rhashtable.h> ++#include <linux/rcupdate.h> ++#include <net/dst.h> + + struct nf_flowtable; + +@@ -20,4 +25,93 @@ struct nf_flowtable { + struct delayed_work gc_work; + }; + ++enum flow_offload_tuple_dir { ++ FLOW_OFFLOAD_DIR_ORIGINAL, ++ FLOW_OFFLOAD_DIR_REPLY, ++ __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, ++}; ++#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) ++ ++struct flow_offload_tuple { ++ union { ++ struct in_addr src_v4; ++ struct in6_addr src_v6; ++ }; ++ union { ++ struct in_addr dst_v4; ++ struct in6_addr dst_v6; ++ }; ++ struct { ++ __be16 src_port; ++ __be16 dst_port; ++ }; ++ ++ int iifidx; ++ ++ u8 l3proto; ++ u8 l4proto; ++ u8 dir; ++ ++ int oifidx; ++ ++ struct dst_entry *dst_cache; ++}; ++ ++struct flow_offload_tuple_rhash { ++ struct rhash_head node; ++ struct flow_offload_tuple tuple; ++}; ++ ++#define FLOW_OFFLOAD_SNAT 0x1 ++#define FLOW_OFFLOAD_DNAT 0x2 ++#define FLOW_OFFLOAD_DYING 0x4 ++ ++struct flow_offload { ++ struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; ++ u32 flags; ++ union { ++ /* Your private driver data here. */ ++ u32 timeout; ++ }; ++}; ++ ++#define NF_FLOW_TIMEOUT (30 * HZ) ++ ++struct nf_flow_route { ++ struct { ++ struct dst_entry *dst; ++ int ifindex; ++ } tuple[FLOW_OFFLOAD_DIR_MAX]; ++}; ++ ++struct flow_offload *flow_offload_alloc(struct nf_conn *ct, ++ struct nf_flow_route *route); ++void flow_offload_free(struct flow_offload *flow); ++ ++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); ++void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow); ++struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, ++ struct flow_offload_tuple *tuple); ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, ++ void (*iter)(struct flow_offload *flow, void *data), ++ void *data); ++void nf_flow_offload_work_gc(struct work_struct *work); ++extern const struct rhashtable_params nf_flow_offload_rhash_params; ++ ++void flow_offload_dead(struct flow_offload *flow); ++ ++int nf_flow_snat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir); ++int nf_flow_dnat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir); ++ ++struct flow_ports { ++ __be16 source, dest; ++}; ++ ++#define MODULE_ALIAS_NF_FLOWTABLE(family) \ ++ MODULE_ALIAS("nf-flowtable-" __stringify(family)) ++ + #endif /* _FLOW_OFFLOAD_H */ +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -661,6 +661,13 @@ endif # NF_TABLES_NETDEV + + endif # NF_TABLES + ++config NF_FLOW_TABLE ++ tristate "Netfilter flow table module" ++ help ++ This option adds the flow table core infrastructure. ++ ++ To compile it as a module, choose M here. ++ + config NETFILTER_XTABLES + tristate "Netfilter Xtables support (required for ip_tables)" + default m if NETFILTER_ADVANCED=n +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -110,6 +110,9 @@ obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_ + obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o + obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o + ++# flow table infrastructure ++obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o ++ + # generic X tables + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o + +--- /dev/null ++++ b/net/netfilter/nf_flow_table.c +@@ -0,0 +1,429 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/netdevice.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_conntrack.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#include <net/netfilter/nf_conntrack_tuple.h> ++ ++struct flow_offload_entry { ++ struct flow_offload flow; ++ struct nf_conn *ct; ++ struct rcu_head rcu_head; ++}; ++ ++struct flow_offload * ++flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) ++{ ++ struct flow_offload_entry *entry; ++ struct flow_offload *flow; ++ ++ if (unlikely(nf_ct_is_dying(ct) || ++ !atomic_inc_not_zero(&ct->ct_general.use))) ++ return NULL; ++ ++ entry = kzalloc(sizeof(*entry), GFP_ATOMIC); ++ if (!entry) ++ goto err_ct_refcnt; ++ ++ flow = &entry->flow; ++ ++ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) ++ goto err_dst_cache_original; ++ ++ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) ++ goto err_dst_cache_reply; ++ ++ entry->ct = ct; ++ ++ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { ++ case NFPROTO_IPV4: ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; ++ break; ++ case NFPROTO_IPV6: ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; ++ break; ++ } ++ ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; ++ ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = ++ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = ++ route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; ++ ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = ++ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = ++ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; ++ ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = ++ FLOW_OFFLOAD_DIR_ORIGINAL; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = ++ FLOW_OFFLOAD_DIR_REPLY; ++ ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = ++ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = ++ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = ++ route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; ++ flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = ++ route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; ++ ++ if (ct->status & IPS_SRC_NAT) ++ flow->flags |= FLOW_OFFLOAD_SNAT; ++ else if (ct->status & IPS_DST_NAT) ++ flow->flags |= FLOW_OFFLOAD_DNAT; ++ ++ return flow; ++ ++err_dst_cache_reply: ++ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); ++err_dst_cache_original: ++ kfree(entry); ++err_ct_refcnt: ++ nf_ct_put(ct); ++ ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(flow_offload_alloc); ++ ++void flow_offload_free(struct flow_offload *flow) ++{ ++ struct flow_offload_entry *e; ++ ++ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); ++ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); ++ e = container_of(flow, struct flow_offload_entry, flow); ++ kfree(e); ++} ++EXPORT_SYMBOL_GPL(flow_offload_free); ++ ++void flow_offload_dead(struct flow_offload *flow) ++{ ++ flow->flags |= FLOW_OFFLOAD_DYING; ++} ++EXPORT_SYMBOL_GPL(flow_offload_dead); ++ ++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) ++{ ++ flow->timeout = (u32)jiffies; ++ ++ rhashtable_insert_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, ++ *flow_table->type->params); ++ rhashtable_insert_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, ++ *flow_table->type->params); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(flow_offload_add); ++ ++void flow_offload_del(struct nf_flowtable *flow_table, ++ struct flow_offload *flow) ++{ ++ struct flow_offload_entry *e; ++ ++ rhashtable_remove_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, ++ *flow_table->type->params); ++ rhashtable_remove_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, ++ *flow_table->type->params); ++ ++ e = container_of(flow, struct flow_offload_entry, flow); ++ kfree_rcu(e, rcu_head); ++} ++EXPORT_SYMBOL_GPL(flow_offload_del); ++ ++struct flow_offload_tuple_rhash * ++flow_offload_lookup(struct nf_flowtable *flow_table, ++ struct flow_offload_tuple *tuple) ++{ ++ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, ++ *flow_table->type->params); ++} ++EXPORT_SYMBOL_GPL(flow_offload_lookup); ++ ++static void nf_flow_release_ct(const struct flow_offload *flow) ++{ ++ struct flow_offload_entry *e; ++ ++ e = container_of(flow, struct flow_offload_entry, flow); ++ nf_ct_delete(e->ct, 0, 0); ++ nf_ct_put(e->ct); ++} ++ ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, ++ void (*iter)(struct flow_offload *flow, void *data), ++ void *data) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct rhashtable_iter hti; ++ struct flow_offload *flow; ++ int err; ++ ++ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); ++ if (err) ++ return err; ++ ++ rhashtable_walk_start(&hti); ++ ++ while ((tuplehash = rhashtable_walk_next(&hti))) { ++ if (IS_ERR(tuplehash)) { ++ err = PTR_ERR(tuplehash); ++ if (err != -EAGAIN) ++ goto out; ++ ++ continue; ++ } ++ if (tuplehash->tuple.dir) ++ continue; ++ ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); ++ ++ iter(flow, data); ++ } ++out: ++ rhashtable_walk_stop(&hti); ++ rhashtable_walk_exit(&hti); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_iterate); ++ ++static inline bool nf_flow_has_expired(const struct flow_offload *flow) ++{ ++ return (__s32)(flow->timeout - (u32)jiffies) <= 0; ++} ++ ++static inline bool nf_flow_is_dying(const struct flow_offload *flow) ++{ ++ return flow->flags & FLOW_OFFLOAD_DYING; ++} ++ ++void nf_flow_offload_work_gc(struct work_struct *work) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table; ++ struct rhashtable_iter hti; ++ struct flow_offload *flow; ++ int err; ++ ++ flow_table = container_of(work, struct nf_flowtable, gc_work.work); ++ ++ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); ++ if (err) ++ goto schedule; ++ ++ rhashtable_walk_start(&hti); ++ ++ while ((tuplehash = rhashtable_walk_next(&hti))) { ++ if (IS_ERR(tuplehash)) { ++ err = PTR_ERR(tuplehash); ++ if (err != -EAGAIN) ++ goto out; ++ ++ continue; ++ } ++ if (tuplehash->tuple.dir) ++ continue; ++ ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); ++ ++ if (nf_flow_has_expired(flow) || ++ nf_flow_is_dying(flow)) { ++ flow_offload_del(flow_table, flow); ++ nf_flow_release_ct(flow); ++ } ++ } ++out: ++ rhashtable_walk_stop(&hti); ++ rhashtable_walk_exit(&hti); ++schedule: ++ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); ++ ++static u32 flow_offload_hash(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple *tuple = data; ++ ++ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple_rhash *tuplehash = data; ++ ++ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, ++ const void *ptr) ++{ ++ const struct flow_offload_tuple *tuple = arg->key; ++ const struct flow_offload_tuple_rhash *x = ptr; ++ ++ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) ++ return 1; ++ ++ return 0; ++} ++ ++const struct rhashtable_params nf_flow_offload_rhash_params = { ++ .head_offset = offsetof(struct flow_offload_tuple_rhash, node), ++ .hashfn = flow_offload_hash, ++ .obj_hashfn = flow_offload_hash_obj, ++ .obj_cmpfn = flow_offload_hash_cmp, ++ .automatic_shrinking = true, ++}; ++EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); ++ ++static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, ++ __be16 port, __be16 new_port) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, ++ __be16 port, __be16 new_port) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace2(&udph->check, skb, port, ++ new_port, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, __be16 port, __be16 new_port) ++{ ++ switch (protocol) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++int nf_flow_snat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir) ++{ ++ struct flow_ports *hdr; ++ __be16 port, new_port; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || ++ skb_try_make_writable(skb, thoff + sizeof(*hdr))) ++ return -1; ++ ++ hdr = (void *)(skb_network_header(skb) + thoff); ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ port = hdr->source; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; ++ hdr->source = new_port; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ port = hdr->dest; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; ++ hdr->dest = new_port; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_port(skb, thoff, protocol, port, new_port); ++} ++EXPORT_SYMBOL_GPL(nf_flow_snat_port); ++ ++int nf_flow_dnat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir) ++{ ++ struct flow_ports *hdr; ++ __be16 port, new_port; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || ++ skb_try_make_writable(skb, thoff + sizeof(*hdr))) ++ return -1; ++ ++ hdr = (void *)(skb_network_header(skb) + thoff); ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ port = hdr->dest; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; ++ hdr->dest = new_port; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ port = hdr->source; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; ++ hdr->source = new_port; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_port(skb, thoff, protocol, port, new_port); ++} ++EXPORT_SYMBOL_GPL(nf_flow_dnat_port); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch b/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch new file mode 100644 index 0000000000..6f36171605 --- /dev/null +++ b/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch @@ -0,0 +1,334 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:15 +0100 +Subject: [PATCH] netfilter: flow table support for IPv4 + +This patch adds the IPv4 flow table type, that implements the datapath +flow table to forward IPv4 traffic. Rationale is: + +1) Look up for the packet in the flow table, from the ingress hook. +2) If there's a hit, decrement ttl and pass it on to the neighbour layer + for transmission. +3) If there's a miss, packet is passed up to the classic forwarding + path. + +This patch also supports layer 3 source and destination NAT. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -77,6 +77,14 @@ config NF_TABLES_ARP + + endif # NF_TABLES + ++config NF_FLOW_TABLE_IPV4 ++ select NF_FLOW_TABLE ++ tristate "Netfilter flow table IPv4 module" ++ help ++ This option adds the flow table IPv4 support. ++ ++ To compile it as a module, choose M here. ++ + config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/ipv4/netfilter/Makefile ++++ b/net/ipv4/netfilter/Makefile +@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi + obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o + obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o + ++# flow table support ++obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o ++ + # generic IP tables + obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o + +--- /dev/null ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -0,0 +1,283 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/ip.h> ++#include <linux/netdevice.h> ++#include <net/ip.h> ++#include <net/neighbour.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_tables.h> ++/* For layer 4 checksum field offset. */ ++#include <linux/tcp.h> ++#include <linux/udp.h> ++ ++static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace4(&udph->check, skb, addr, ++ new_addr, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, ++ unsigned int thoff, __be32 addr, ++ __be32 new_addr) ++{ ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ csum_replace4(&iph->check, addr, new_addr); ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct iphdr *iph = ip_hdr(skb); ++ unsigned int thoff = iph->ihl * 4; ++ ++ if (flow->flags & FLOW_OFFLOAD_SNAT && ++ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ if (flow->flags & FLOW_OFFLOAD_DNAT && ++ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static bool ip_has_options(unsigned int thoff) ++{ ++ return thoff != sizeof(struct iphdr); ++} ++ ++static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, ++ struct flow_offload_tuple *tuple) ++{ ++ struct flow_ports *ports; ++ unsigned int thoff; ++ struct iphdr *iph; ++ ++ if (!pskb_may_pull(skb, sizeof(*iph))) ++ return -1; ++ ++ iph = ip_hdr(skb); ++ thoff = iph->ihl * 4; ++ ++ if (ip_is_fragment(iph) || ++ unlikely(ip_has_options(thoff))) ++ return -1; ++ ++ if (iph->protocol != IPPROTO_TCP && ++ iph->protocol != IPPROTO_UDP) ++ return -1; ++ ++ thoff = iph->ihl * 4; ++ if (!pskb_may_pull(skb, thoff + sizeof(*ports))) ++ return -1; ++ ++ ports = (struct flow_ports *)(skb_network_header(skb) + thoff); ++ ++ tuple->src_v4.s_addr = iph->saddr; ++ tuple->dst_v4.s_addr = iph->daddr; ++ tuple->src_port = ports->source; ++ tuple->dst_port = ports->dest; ++ tuple->l3proto = AF_INET; ++ tuple->l4proto = iph->protocol; ++ tuple->iifidx = dev->ifindex; ++ ++ return 0; ++} ++ ++/* Based on ip_exceeds_mtu(). */ ++static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu) ++ return false; ++ ++ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ++ return false; ++ ++ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) ++ return false; ++ ++ return true; ++} ++ ++static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) ++{ ++ u32 mtu; ++ ++ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); ++ if (__nf_flow_exceeds_mtu(skb, mtu)) ++ return true; ++ ++ return false; ++} ++ ++static unsigned int ++nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table = priv; ++ struct flow_offload_tuple tuple = {}; ++ enum flow_offload_tuple_dir dir; ++ struct flow_offload *flow; ++ struct net_device *outdev; ++ const struct rtable *rt; ++ struct iphdr *iph; ++ __be32 nexthop; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ return NF_ACCEPT; ++ ++ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) ++ return NF_ACCEPT; ++ ++ tuplehash = flow_offload_lookup(flow_table, &tuple); ++ if (tuplehash == NULL) ++ return NF_ACCEPT; ++ ++ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); ++ if (!outdev) ++ return NF_ACCEPT; ++ ++ dir = tuplehash->tuple.dir; ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ++ ++ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; ++ if (unlikely(nf_flow_exceeds_mtu(skb, rt))) ++ return NF_ACCEPT; ++ ++ if (skb_try_make_writable(skb, sizeof(*iph))) ++ return NF_DROP; ++ ++ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && ++ nf_flow_nat_ip(flow, skb, dir) < 0) ++ return NF_DROP; ++ ++ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ++ iph = ip_hdr(skb); ++ ip_decrease_ttl(iph); ++ ++ skb->dev = outdev; ++ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); ++ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); ++ ++ return NF_STOLEN; ++} ++ ++static struct nf_flowtable_type flowtable_ipv4 = { ++ .family = NFPROTO_IPV4, ++ .params = &nf_flow_offload_rhash_params, ++ .gc = nf_flow_offload_work_gc, ++ .hook = nf_flow_offload_ip_hook, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_ipv4_module_init(void) ++{ ++ nft_register_flowtable_type(&flowtable_ipv4); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_ipv4_module_exit(void) ++{ ++ nft_unregister_flowtable_type(&flowtable_ipv4); ++} ++ ++module_init(nf_flow_ipv4_module_init); ++module_exit(nf_flow_ipv4_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS_NF_FLOWTABLE(AF_INET); diff --git a/target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch b/target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch new file mode 100644 index 0000000000..a5bbac44f3 --- /dev/null +++ b/target/linux/generic/backport-4.14/324-netfilter-flow-table-support-for-IPv6.patch @@ -0,0 +1,354 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:19 +0100 +Subject: [PATCH] netfilter: flow table support for IPv6 + +This patch adds the IPv6 flow table type, that implements the datapath +flow table to forward IPv6 traffic. + +This patch exports ip6_dst_mtu_forward() that is required to check for +mtu to pass up packets that need PMTUD handling to the classic +forwarding path. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c + +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -913,6 +913,8 @@ static inline struct sk_buff *ip6_finish + &inet6_sk(sk)->cork); + } + ++unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); ++ + int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, + struct flowi6 *fl6); + struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -370,7 +370,7 @@ static inline int ip6_forward_finish(str + return dst_output(net, sk, skb); + } + +-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) ++unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) + { + unsigned int mtu; + struct inet6_dev *idev; +@@ -390,6 +390,7 @@ static unsigned int ip6_dst_mtu_forward( + + return mtu; + } ++EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward); + + static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) + { +--- a/net/ipv6/netfilter/Kconfig ++++ b/net/ipv6/netfilter/Kconfig +@@ -71,6 +71,14 @@ config NFT_FIB_IPV6 + endif # NF_TABLES_IPV6 + endif # NF_TABLES + ++config NF_FLOW_TABLE_IPV6 ++ select NF_FLOW_TABLE ++ tristate "Netfilter flow table IPv6 module" ++ help ++ This option adds the flow table IPv6 support. ++ ++ To compile it as a module, choose M here. ++ + config NF_DUP_IPV6 + tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/ipv6/netfilter/Makefile ++++ b/net/ipv6/netfilter/Makefile +@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redi + obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o + obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o + ++# flow table support ++obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o ++ + # matches + obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o + obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o +--- /dev/null ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -0,0 +1,277 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/ipv6.h> ++#include <linux/netdevice.h> ++#include <linux/ipv6.h> ++#include <net/ipv6.h> ++#include <net/ip6_route.h> ++#include <net/neighbour.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_tables.h> ++/* For layer 4 checksum field offset. */ ++#include <linux/tcp.h> ++#include <linux/udp.h> ++ ++static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, ++ struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, ++ new_addr->s6_addr32, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, ++ struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, ++ new_addr->s6_addr32, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ switch (ip6h->nexthdr) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_snat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct in6_addr addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = ip6h->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; ++ ip6h->saddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = ip6h->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; ++ ip6h->daddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); ++} ++ ++static int nf_flow_dnat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct in6_addr addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = ip6h->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; ++ ip6h->daddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = ip6h->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; ++ ip6h->saddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); ++} ++ ++static int nf_flow_nat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct ipv6hdr *ip6h = ipv6_hdr(skb); ++ unsigned int thoff = sizeof(*ip6h); ++ ++ if (flow->flags & FLOW_OFFLOAD_SNAT && ++ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || ++ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) ++ return -1; ++ if (flow->flags & FLOW_OFFLOAD_DNAT && ++ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || ++ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ++ struct flow_offload_tuple *tuple) ++{ ++ struct flow_ports *ports; ++ struct ipv6hdr *ip6h; ++ unsigned int thoff; ++ ++ if (!pskb_may_pull(skb, sizeof(*ip6h))) ++ return -1; ++ ++ ip6h = ipv6_hdr(skb); ++ ++ if (ip6h->nexthdr != IPPROTO_TCP && ++ ip6h->nexthdr != IPPROTO_UDP) ++ return -1; ++ ++ thoff = sizeof(*ip6h); ++ if (!pskb_may_pull(skb, thoff + sizeof(*ports))) ++ return -1; ++ ++ ports = (struct flow_ports *)(skb_network_header(skb) + thoff); ++ ++ tuple->src_v6 = ip6h->saddr; ++ tuple->dst_v6 = ip6h->daddr; ++ tuple->src_port = ports->source; ++ tuple->dst_port = ports->dest; ++ tuple->l3proto = AF_INET6; ++ tuple->l4proto = ip6h->nexthdr; ++ tuple->iifidx = dev->ifindex; ++ ++ return 0; ++} ++ ++/* Based on ip_exceeds_mtu(). */ ++static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu) ++ return false; ++ ++ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) ++ return false; ++ ++ return true; ++} ++ ++static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) ++{ ++ u32 mtu; ++ ++ mtu = ip6_dst_mtu_forward(&rt->dst); ++ if (__nf_flow_exceeds_mtu(skb, mtu)) ++ return true; ++ ++ return false; ++} ++ ++static unsigned int ++nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table = priv; ++ struct flow_offload_tuple tuple = {}; ++ enum flow_offload_tuple_dir dir; ++ struct flow_offload *flow; ++ struct net_device *outdev; ++ struct in6_addr *nexthop; ++ struct ipv6hdr *ip6h; ++ struct rt6_info *rt; ++ ++ if (skb->protocol != htons(ETH_P_IPV6)) ++ return NF_ACCEPT; ++ ++ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) ++ return NF_ACCEPT; ++ ++ tuplehash = flow_offload_lookup(flow_table, &tuple); ++ if (tuplehash == NULL) ++ return NF_ACCEPT; ++ ++ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); ++ if (!outdev) ++ return NF_ACCEPT; ++ ++ dir = tuplehash->tuple.dir; ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ++ ++ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; ++ if (unlikely(nf_flow_exceeds_mtu(skb, rt))) ++ return NF_ACCEPT; ++ ++ if (skb_try_make_writable(skb, sizeof(*ip6h))) ++ return NF_DROP; ++ ++ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && ++ nf_flow_nat_ipv6(flow, skb, dir) < 0) ++ return NF_DROP; ++ ++ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ++ ip6h = ipv6_hdr(skb); ++ ip6h->hop_limit--; ++ ++ skb->dev = outdev; ++ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); ++ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); ++ ++ return NF_STOLEN; ++} ++ ++static struct nf_flowtable_type flowtable_ipv6 = { ++ .family = NFPROTO_IPV6, ++ .params = &nf_flow_offload_rhash_params, ++ .gc = nf_flow_offload_work_gc, ++ .hook = nf_flow_offload_ipv6_hook, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_ipv6_module_init(void) ++{ ++ nft_register_flowtable_type(&flowtable_ipv6); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_ipv6_module_exit(void) ++{ ++ nft_unregister_flowtable_type(&flowtable_ipv6); ++} ++ ++module_init(nf_flow_ipv6_module_init); ++module_exit(nf_flow_ipv6_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); diff --git a/target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch b/target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch new file mode 100644 index 0000000000..9fcb1be982 --- /dev/null +++ b/target/linux/generic/backport-4.14/325-netfilter-flow-table-support-for-the-mixed-IPv4-IPv6.patch @@ -0,0 +1,141 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:22 +0100 +Subject: [PATCH] netfilter: flow table support for the mixed IPv4/IPv6 family + +This patch adds the IPv6 flow table type, that implements the datapath +flow table to forward IPv6 traffic. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/netfilter/nf_flow_table_inet.c + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -111,6 +111,11 @@ struct flow_ports { + __be16 source, dest; + }; + ++unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state); ++unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state); ++ + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -202,7 +202,7 @@ static bool nf_flow_exceeds_mtu(struct s + return false; + } + +-static unsigned int ++unsigned int + nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) + { +@@ -254,6 +254,7 @@ nf_flow_offload_ip_hook(void *priv, stru + + return NF_STOLEN; + } ++EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); + + static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -196,7 +196,7 @@ static bool nf_flow_exceeds_mtu(struct s + return false; + } + +-static unsigned int ++unsigned int + nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) + { +@@ -248,6 +248,7 @@ nf_flow_offload_ipv6_hook(void *priv, st + + return NF_STOLEN; + } ++EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); + + static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -661,6 +661,14 @@ endif # NF_TABLES_NETDEV + + endif # NF_TABLES + ++config NF_FLOW_TABLE_INET ++ select NF_FLOW_TABLE ++ tristate "Netfilter flow table mixed IPv4/IPv6 module" ++ help ++ This option adds the flow table mixed IPv4/IPv6 support. ++ ++ To compile it as a module, choose M here. ++ + config NF_FLOW_TABLE + tristate "Netfilter flow table module" + help +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -112,6 +112,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_ + + # flow table infrastructure + obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o ++obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o + + # generic X tables + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +--- /dev/null ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -0,0 +1,48 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_tables.h> ++ ++static unsigned int ++nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ switch (skb->protocol) { ++ case htons(ETH_P_IP): ++ return nf_flow_offload_ip_hook(priv, skb, state); ++ case htons(ETH_P_IPV6): ++ return nf_flow_offload_ipv6_hook(priv, skb, state); ++ } ++ ++ return NF_ACCEPT; ++} ++ ++static struct nf_flowtable_type flowtable_inet = { ++ .family = NFPROTO_INET, ++ .params = &nf_flow_offload_rhash_params, ++ .gc = nf_flow_offload_work_gc, ++ .hook = nf_flow_offload_inet_hook, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_inet_module_init(void) ++{ ++ nft_register_flowtable_type(&flowtable_inet); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_inet_module_exit(void) ++{ ++ nft_unregister_flowtable_type(&flowtable_inet); ++} ++ ++module_init(nf_flow_inet_module_init); ++module_exit(nf_flow_inet_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */ diff --git a/target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch b/target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch new file mode 100644 index 0000000000..86f1f8a098 --- /dev/null +++ b/target/linux/generic/backport-4.14/326-netfilter-nf_tables-flow-offload-expression.patch @@ -0,0 +1,332 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:26 +0100 +Subject: [PATCH] netfilter: nf_tables: flow offload expression + +Add new instruction for the nf_tables VM that allows us to specify what +flows are offloaded into a given flow table via name. This new +instruction creates the flow entry and adds it to the flow table. + +Only established flows, ie. we have seen traffic in both directions, are +added to the flow table. You can still decide to offload entries at a +later stage via packet counting or checking the ct status in case you +want to offload assured conntracks. + +This new extension depends on the conntrack subsystem. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/netfilter/nft_flow_offload.c + +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -957,6 +957,17 @@ enum nft_ct_attributes { + }; + #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) + ++/** ++ * enum nft_flow_attributes - ct offload expression attributes ++ * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) ++ */ ++enum nft_offload_attributes { ++ NFTA_FLOW_UNSPEC, ++ NFTA_FLOW_TABLE_NAME, ++ __NFTA_FLOW_MAX, ++}; ++#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) ++ + enum nft_limit_type { + NFT_LIMIT_PKTS, + NFT_LIMIT_PKT_BYTES +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -509,6 +509,13 @@ config NFT_CT + This option adds the "ct" expression that you can use to match + connection tracking information such as the flow state. + ++config NFT_FLOW_OFFLOAD ++ depends on NF_CONNTRACK ++ tristate "Netfilter nf_tables hardware flow offload module" ++ help ++ This option adds the "flow_offload" expression that you can use to ++ choose what flows are placed into the hardware. ++ + config NFT_SET_RBTREE + tristate "Netfilter nf_tables rbtree set module" + help +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o + obj-$(CONFIG_NFT_RT) += nft_rt.o + obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o + obj-$(CONFIG_NFT_CT) += nft_ct.o ++obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o + obj-$(CONFIG_NFT_LIMIT) += nft_limit.o + obj-$(CONFIG_NFT_NAT) += nft_nat.o + obj-$(CONFIG_NFT_OBJREF) += nft_objref.o +--- /dev/null ++++ b/net/netfilter/nft_flow_offload.c +@@ -0,0 +1,264 @@ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/netlink.h> ++#include <linux/netfilter.h> ++#include <linux/workqueue.h> ++#include <linux/spinlock.h> ++#include <linux/netfilter/nf_tables.h> ++#include <net/ip.h> /* for ipv4 options. */ ++#include <net/netfilter/nf_tables.h> ++#include <net/netfilter/nf_tables_core.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#include <linux/netfilter/nf_conntrack_common.h> ++#include <net/netfilter/nf_flow_table.h> ++ ++struct nft_flow_offload { ++ struct nft_flowtable *flowtable; ++}; ++ ++static int nft_flow_route(const struct nft_pktinfo *pkt, ++ const struct nf_conn *ct, ++ struct nf_flow_route *route, ++ enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *this_dst = skb_dst(pkt->skb); ++ struct dst_entry *other_dst = NULL; ++ struct flowi fl; ++ ++ memset(&fl, 0, sizeof(fl)); ++ switch (nft_pf(pkt)) { ++ case NFPROTO_IPV4: ++ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; ++ break; ++ case NFPROTO_IPV6: ++ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; ++ break; ++ } ++ ++ nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); ++ if (!other_dst) ++ return -ENOENT; ++ ++ route->tuple[dir].dst = this_dst; ++ route->tuple[dir].ifindex = nft_in(pkt)->ifindex; ++ route->tuple[!dir].dst = other_dst; ++ route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; ++ ++ return 0; ++} ++ ++static bool nft_flow_offload_skip(struct sk_buff *skb) ++{ ++ struct ip_options *opt = &(IPCB(skb)->opt); ++ ++ if (unlikely(opt->optlen)) ++ return true; ++ if (skb_sec_path(skb)) ++ return true; ++ ++ return false; ++} ++ ++static void nft_flow_offload_eval(const struct nft_expr *expr, ++ struct nft_regs *regs, ++ const struct nft_pktinfo *pkt) ++{ ++ struct nft_flow_offload *priv = nft_expr_priv(expr); ++ struct nf_flowtable *flowtable = &priv->flowtable->data; ++ enum ip_conntrack_info ctinfo; ++ struct nf_flow_route route; ++ struct flow_offload *flow; ++ enum ip_conntrack_dir dir; ++ struct nf_conn *ct; ++ int ret; ++ ++ if (nft_flow_offload_skip(pkt->skb)) ++ goto out; ++ ++ ct = nf_ct_get(pkt->skb, &ctinfo); ++ if (!ct) ++ goto out; ++ ++ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { ++ case IPPROTO_TCP: ++ case IPPROTO_UDP: ++ break; ++ default: ++ goto out; ++ } ++ ++ if (test_bit(IPS_HELPER_BIT, &ct->status)) ++ goto out; ++ ++ if (ctinfo == IP_CT_NEW || ++ ctinfo == IP_CT_RELATED) ++ goto out; ++ ++ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ goto out; ++ ++ dir = CTINFO2DIR(ctinfo); ++ if (nft_flow_route(pkt, ct, &route, dir) < 0) ++ goto err_flow_route; ++ ++ flow = flow_offload_alloc(ct, &route); ++ if (!flow) ++ goto err_flow_alloc; ++ ++ ret = flow_offload_add(flowtable, flow); ++ if (ret < 0) ++ goto err_flow_add; ++ ++ return; ++ ++err_flow_add: ++ flow_offload_free(flow); ++err_flow_alloc: ++ dst_release(route.tuple[!dir].dst); ++err_flow_route: ++ clear_bit(IPS_OFFLOAD_BIT, &ct->status); ++out: ++ regs->verdict.code = NFT_BREAK; ++} ++ ++static int nft_flow_offload_validate(const struct nft_ctx *ctx, ++ const struct nft_expr *expr, ++ const struct nft_data **data) ++{ ++ unsigned int hook_mask = (1 << NF_INET_FORWARD); ++ ++ return nft_chain_validate_hooks(ctx->chain, hook_mask); ++} ++ ++static int nft_flow_offload_init(const struct nft_ctx *ctx, ++ const struct nft_expr *expr, ++ const struct nlattr * const tb[]) ++{ ++ struct nft_flow_offload *priv = nft_expr_priv(expr); ++ u8 genmask = nft_genmask_next(ctx->net); ++ struct nft_flowtable *flowtable; ++ ++ if (!tb[NFTA_FLOW_TABLE_NAME]) ++ return -EINVAL; ++ ++ flowtable = nf_tables_flowtable_lookup(ctx->table, ++ tb[NFTA_FLOW_TABLE_NAME], ++ genmask); ++ if (IS_ERR(flowtable)) ++ return PTR_ERR(flowtable); ++ ++ priv->flowtable = flowtable; ++ flowtable->use++; ++ ++ return nf_ct_netns_get(ctx->net, ctx->afi->family); ++} ++ ++static void nft_flow_offload_destroy(const struct nft_ctx *ctx, ++ const struct nft_expr *expr) ++{ ++ struct nft_flow_offload *priv = nft_expr_priv(expr); ++ ++ priv->flowtable->use--; ++ nf_ct_netns_put(ctx->net, ctx->afi->family); ++} ++ ++static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) ++{ ++ struct nft_flow_offload *priv = nft_expr_priv(expr); ++ ++ if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) ++ goto nla_put_failure; ++ ++ return 0; ++ ++nla_put_failure: ++ return -1; ++} ++ ++static struct nft_expr_type nft_flow_offload_type; ++static const struct nft_expr_ops nft_flow_offload_ops = { ++ .type = &nft_flow_offload_type, ++ .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), ++ .eval = nft_flow_offload_eval, ++ .init = nft_flow_offload_init, ++ .destroy = nft_flow_offload_destroy, ++ .validate = nft_flow_offload_validate, ++ .dump = nft_flow_offload_dump, ++}; ++ ++static struct nft_expr_type nft_flow_offload_type __read_mostly = { ++ .name = "flow_offload", ++ .ops = &nft_flow_offload_ops, ++ .maxattr = NFTA_FLOW_MAX, ++ .owner = THIS_MODULE, ++}; ++ ++static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) ++{ ++ struct net_device *dev = data; ++ ++ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) ++ return; ++ ++ flow_offload_dead(flow); ++} ++ ++static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, ++ void *data) ++{ ++ nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); ++} ++ ++static int flow_offload_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ ++ if (event != NETDEV_DOWN) ++ return NOTIFY_DONE; ++ ++ nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block flow_offload_netdev_notifier = { ++ .notifier_call = flow_offload_netdev_event, ++}; ++ ++static int __init nft_flow_offload_module_init(void) ++{ ++ int err; ++ ++ register_netdevice_notifier(&flow_offload_netdev_notifier); ++ ++ err = nft_register_expr(&nft_flow_offload_type); ++ if (err < 0) ++ goto register_expr; ++ ++ return 0; ++ ++register_expr: ++ unregister_netdevice_notifier(&flow_offload_netdev_notifier); ++ return err; ++} ++ ++static void __exit nft_flow_offload_module_exit(void) ++{ ++ struct net *net; ++ ++ nft_unregister_expr(&nft_flow_offload_type); ++ unregister_netdevice_notifier(&flow_offload_netdev_notifier); ++ rtnl_lock(); ++ for_each_net(net) ++ nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); ++ rtnl_unlock(); ++} ++ ++module_init(nft_flow_offload_module_init); ++module_exit(nft_flow_offload_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS_NFT_EXPR("flow_offload"); diff --git a/target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch b/target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch new file mode 100644 index 0000000000..344671961a --- /dev/null +++ b/target/linux/generic/backport-4.14/327-netfilter-nf_tables-remove-nhooks-field-from-struct-.patch @@ -0,0 +1,113 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 19 Dec 2017 13:53:45 +0100 +Subject: [PATCH] netfilter: nf_tables: remove nhooks field from struct + nft_af_info + +We already validate the hook through bitmask, so this check is +superfluous. When removing this, this patch is also fixing a bug in the +new flowtable codebase, since ctx->afi points to the table family +instead of the netdev family which is where the flowtable is really +hooked in. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -963,7 +963,6 @@ enum nft_af_flags { + * + * @list: used internally + * @family: address family +- * @nhooks: number of hooks in this family + * @owner: module owner + * @tables: used internally + * @flags: family flags +@@ -971,7 +970,6 @@ enum nft_af_flags { + struct nft_af_info { + struct list_head list; + int family; +- unsigned int nhooks; + struct module *owner; + struct list_head tables; + u32 flags; +--- a/net/bridge/netfilter/nf_tables_bridge.c ++++ b/net/bridge/netfilter/nf_tables_bridge.c +@@ -44,7 +44,6 @@ nft_do_chain_bridge(void *priv, + + static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, +- .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, + }; + +--- a/net/ipv4/netfilter/nf_tables_arp.c ++++ b/net/ipv4/netfilter/nf_tables_arp.c +@@ -29,7 +29,6 @@ nft_do_chain_arp(void *priv, + + static struct nft_af_info nft_af_arp __read_mostly = { + .family = NFPROTO_ARP, +- .nhooks = NF_ARP_NUMHOOKS, + .owner = THIS_MODULE, + }; + +--- a/net/ipv4/netfilter/nf_tables_ipv4.c ++++ b/net/ipv4/netfilter/nf_tables_ipv4.c +@@ -32,7 +32,6 @@ static unsigned int nft_do_chain_ipv4(vo + + static struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, +- .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + }; + +--- a/net/ipv6/netfilter/nf_tables_ipv6.c ++++ b/net/ipv6/netfilter/nf_tables_ipv6.c +@@ -30,7 +30,6 @@ static unsigned int nft_do_chain_ipv6(vo + + static struct nft_af_info nft_af_ipv6 __read_mostly = { + .family = NFPROTO_IPV6, +- .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + }; + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1328,9 +1328,6 @@ static int nft_chain_parse_hook(struct n + return -EINVAL; + + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); +- if (hook->num >= afi->nhooks) +- return -EINVAL; +- + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + + type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; +@@ -4917,7 +4914,7 @@ static int nf_tables_flowtable_parse_hoo + return -EINVAL; + + hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM])); +- if (hooknum >= ctx->afi->nhooks) ++ if (hooknum != NF_NETDEV_INGRESS) + return -EINVAL; + + priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY])); +--- a/net/netfilter/nf_tables_inet.c ++++ b/net/netfilter/nf_tables_inet.c +@@ -40,7 +40,6 @@ static unsigned int nft_do_chain_inet(vo + + static struct nft_af_info nft_af_inet __read_mostly = { + .family = NFPROTO_INET, +- .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + }; + +--- a/net/netfilter/nf_tables_netdev.c ++++ b/net/netfilter/nf_tables_netdev.c +@@ -40,7 +40,6 @@ nft_do_chain_netdev(void *priv, struct s + + static struct nft_af_info nft_af_netdev __read_mostly = { + .family = NFPROTO_NETDEV, +- .nhooks = NF_NETDEV_NUMHOOKS, + .owner = THIS_MODULE, + .flags = NFT_AF_NEEDS_DEV, + }; diff --git a/target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch b/target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch new file mode 100644 index 0000000000..b5c1b19a59 --- /dev/null +++ b/target/linux/generic/backport-4.14/328-netfilter-nf_tables-fix-a-typo-in-nf_tables_getflowt.patch @@ -0,0 +1,22 @@ +From: Wei Yongjun <weiyongjun1@huawei.com> +Date: Wed, 10 Jan 2018 07:04:54 +0000 +Subject: [PATCH] netfilter: nf_tables: fix a typo in nf_tables_getflowtable() + +Fix a typo, we should check 'flowtable' instead of 'table'. + +Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") +Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5341,7 +5341,7 @@ static int nf_tables_getflowtable(struct + + flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], + genmask); +- if (IS_ERR(table)) ++ if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); diff --git a/target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch b/target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch new file mode 100644 index 0000000000..c897c36724 --- /dev/null +++ b/target/linux/generic/backport-4.14/329-netfilter-improve-flow-table-Kconfig-dependencies.patch @@ -0,0 +1,106 @@ +From: Arnd Bergmann <arnd@arndb.de> +Date: Wed, 10 Jan 2018 18:10:59 +0100 +Subject: [PATCH] netfilter: improve flow table Kconfig dependencies + +The newly added NF_FLOW_TABLE options cause some build failures in +randconfig kernels: + +- when CONFIG_NF_CONNTRACK is disabled, or is a loadable module but + NF_FLOW_TABLE is built-in: + + In file included from net/netfilter/nf_flow_table.c:8:0: + include/net/netfilter/nf_conntrack.h:59:22: error: field 'ct_general' has incomplete type + struct nf_conntrack ct_general; + include/net/netfilter/nf_conntrack.h: In function 'nf_ct_get': + include/net/netfilter/nf_conntrack.h:148:15: error: 'const struct sk_buff' has no member named '_nfct' + include/net/netfilter/nf_conntrack.h: In function 'nf_ct_put': + include/net/netfilter/nf_conntrack.h:157:2: error: implicit declaration of function 'nf_conntrack_put'; did you mean 'nf_ct_put'? [-Werror=implicit-function-declaration] + + net/netfilter/nf_flow_table.o: In function `nf_flow_offload_work_gc': + (.text+0x1540): undefined reference to `nf_ct_delete' + +- when CONFIG_NF_TABLES is disabled: + + In file included from net/ipv6/netfilter/nf_flow_table_ipv6.c:13:0: + include/net/netfilter/nf_tables.h: In function 'nft_gencursor_next': + include/net/netfilter/nf_tables.h:1189:14: error: 'const struct net' has no member named 'nft'; did you mean 'nf'? + + - when CONFIG_NF_FLOW_TABLE_INET is enabled, but NF_FLOW_TABLE_IPV4 + or NF_FLOW_TABLE_IPV6 are not, or are loadable modules + + net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook': + nf_flow_table_inet.c:(.text+0x94): undefined reference to `nf_flow_offload_ipv6_hook' + nf_flow_table_inet.c:(.text+0x40): undefined reference to `nf_flow_offload_ip_hook' + +- when CONFIG_NF_FLOW_TABLES is disabled, but the other options are + enabled: + + net/netfilter/nf_flow_table_inet.o: In function `nf_flow_offload_inet_hook': + nf_flow_table_inet.c:(.text+0x6c): undefined reference to `nf_flow_offload_ipv6_hook' + net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_exit': + nf_flow_table_inet.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type' + net/netfilter/nf_flow_table_inet.o: In function `nf_flow_inet_module_init': + nf_flow_table_inet.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type' + net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_exit': + nf_flow_table_ipv4.c:(.exit.text+0x8): undefined reference to `nft_unregister_flowtable_type' + net/ipv4/netfilter/nf_flow_table_ipv4.o: In function `nf_flow_ipv4_module_init': + nf_flow_table_ipv4.c:(.init.text+0x8): undefined reference to `nft_register_flowtable_type' + +This adds additional Kconfig dependencies to ensure that NF_CONNTRACK and NF_TABLES +are always visible from NF_FLOW_TABLE, and that the internal dependencies between +the four new modules are met. + +Fixes: 7c23b629a808 ("netfilter: flow table support for the mixed IPv4/IPv6 family") +Fixes: 0995210753a2 ("netfilter: flow table support for IPv6") +Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4") +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -78,8 +78,9 @@ config NF_TABLES_ARP + endif # NF_TABLES + + config NF_FLOW_TABLE_IPV4 +- select NF_FLOW_TABLE + tristate "Netfilter flow table IPv4 module" ++ depends on NF_CONNTRACK && NF_TABLES ++ select NF_FLOW_TABLE + help + This option adds the flow table IPv4 support. + +--- a/net/ipv6/netfilter/Kconfig ++++ b/net/ipv6/netfilter/Kconfig +@@ -72,8 +72,9 @@ endif # NF_TABLES_IPV6 + endif # NF_TABLES + + config NF_FLOW_TABLE_IPV6 +- select NF_FLOW_TABLE + tristate "Netfilter flow table IPv6 module" ++ depends on NF_CONNTRACK && NF_TABLES ++ select NF_FLOW_TABLE + help + This option adds the flow table IPv6 support. + +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -669,8 +669,9 @@ endif # NF_TABLES_NETDEV + endif # NF_TABLES + + config NF_FLOW_TABLE_INET +- select NF_FLOW_TABLE + tristate "Netfilter flow table mixed IPv4/IPv6 module" ++ depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6 ++ select NF_FLOW_TABLE + help + This option adds the flow table mixed IPv4/IPv6 support. + +@@ -678,6 +679,7 @@ config NF_FLOW_TABLE_INET + + config NF_FLOW_TABLE + tristate "Netfilter flow table module" ++ depends on NF_CONNTRACK && NF_TABLES + help + This option adds the flow table core infrastructure. + diff --git a/target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch b/target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch new file mode 100644 index 0000000000..42aa7b1b0d --- /dev/null +++ b/target/linux/generic/backport-4.14/330-netfilter-nf_tables-remove-flag-field-from-struct-nf.patch @@ -0,0 +1,59 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 19 Dec 2017 14:07:52 +0100 +Subject: [PATCH] netfilter: nf_tables: remove flag field from struct + nft_af_info + +Replace it by a direct check for the netdev protocol family. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -954,10 +954,6 @@ struct nft_table { + char *name; + }; + +-enum nft_af_flags { +- NFT_AF_NEEDS_DEV = (1 << 0), +-}; +- + /** + * struct nft_af_info - nf_tables address family info + * +@@ -965,14 +961,12 @@ enum nft_af_flags { + * @family: address family + * @owner: module owner + * @tables: used internally +- * @flags: family flags + */ + struct nft_af_info { + struct list_head list; + int family; + struct module *owner; + struct list_head tables; +- u32 flags; + }; + + int nft_register_afinfo(struct net *, struct nft_af_info *); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1345,7 +1345,7 @@ static int nft_chain_parse_hook(struct n + hook->type = type; + + hook->dev = NULL; +- if (afi->flags & NFT_AF_NEEDS_DEV) { ++ if (afi->family == NFPROTO_NETDEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { +--- a/net/netfilter/nf_tables_netdev.c ++++ b/net/netfilter/nf_tables_netdev.c +@@ -41,7 +41,6 @@ nft_do_chain_netdev(void *priv, struct s + static struct nft_af_info nft_af_netdev __read_mostly = { + .family = NFPROTO_NETDEV, + .owner = THIS_MODULE, +- .flags = NFT_AF_NEEDS_DEV, + }; + + static int nf_tables_netdev_init_net(struct net *net) diff --git a/target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch b/target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch new file mode 100644 index 0000000000..a9f13c45df --- /dev/null +++ b/target/linux/generic/backport-4.14/331-netfilter-nf_tables-no-need-for-struct-nft_af_info-t.patch @@ -0,0 +1,80 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 19 Dec 2017 12:17:52 +0100 +Subject: [PATCH] netfilter: nf_tables: no need for struct nft_af_info to + enable/disable table + +nf_tables_table_enable() and nf_tables_table_disable() take a pointer to +struct nft_af_info that is never used, remove it. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -611,10 +611,7 @@ err: + return err; + } + +-static void _nf_tables_table_disable(struct net *net, +- const struct nft_af_info *afi, +- struct nft_table *table, +- u32 cnt) ++static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) + { + struct nft_chain *chain; + u32 i = 0; +@@ -632,9 +629,7 @@ static void _nf_tables_table_disable(str + } + } + +-static int nf_tables_table_enable(struct net *net, +- const struct nft_af_info *afi, +- struct nft_table *table) ++static int nf_tables_table_enable(struct net *net, struct nft_table *table) + { + struct nft_chain *chain; + int err, i = 0; +@@ -654,15 +649,13 @@ static int nf_tables_table_enable(struct + return 0; + err: + if (i) +- _nf_tables_table_disable(net, afi, table, i); ++ nft_table_disable(net, table, i); + return err; + } + +-static void nf_tables_table_disable(struct net *net, +- const struct nft_af_info *afi, +- struct nft_table *table) ++static void nf_tables_table_disable(struct net *net, struct nft_table *table) + { +- _nf_tables_table_disable(net, afi, table, 0); ++ nft_table_disable(net, table, 0); + } + + static int nf_tables_updtable(struct nft_ctx *ctx) +@@ -691,7 +684,7 @@ static int nf_tables_updtable(struct nft + nft_trans_table_enable(trans) = false; + } else if (!(flags & NFT_TABLE_F_DORMANT) && + ctx->table->flags & NFT_TABLE_F_DORMANT) { +- ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); ++ ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret >= 0) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + nft_trans_table_enable(trans) = true; +@@ -5719,7 +5712,6 @@ static int nf_tables_commit(struct net * + if (nft_trans_table_update(trans)) { + if (!nft_trans_table_enable(trans)) { + nf_tables_table_disable(net, +- trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } +@@ -5881,7 +5873,6 @@ static int nf_tables_abort(struct net *n + if (nft_trans_table_update(trans)) { + if (nft_trans_table_enable(trans)) { + nf_tables_table_disable(net, +- trans->ctx.afi, + trans->ctx.table); + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } diff --git a/target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch b/target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch new file mode 100644 index 0000000000..158f987fef --- /dev/null +++ b/target/linux/generic/backport-4.14/332-netfilter-nf_tables-remove-struct-nft_af_info-parame.patch @@ -0,0 +1,60 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 19 Dec 2017 13:40:22 +0100 +Subject: [PATCH] netfilter: nf_tables: remove struct nft_af_info parameter in + nf_tables_chain_type_lookup() + +Pass family number instead, this comes in preparation for the removal of +struct nft_af_info. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -423,7 +423,7 @@ static inline u64 nf_tables_alloc_handle + static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX]; + + static const struct nf_chain_type * +-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla) ++__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) + { + int i; + +@@ -436,22 +436,20 @@ __nf_tables_chain_type_lookup(int family + } + + static const struct nf_chain_type * +-nf_tables_chain_type_lookup(const struct nft_af_info *afi, +- const struct nlattr *nla, +- bool autoload) ++nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) + { + const struct nf_chain_type *type; + +- type = __nf_tables_chain_type_lookup(afi->family, nla); ++ type = __nf_tables_chain_type_lookup(nla, family); + if (type != NULL) + return type; + #ifdef CONFIG_MODULES + if (autoload) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +- request_module("nft-chain-%u-%.*s", afi->family, ++ request_module("nft-chain-%u-%.*s", family, + nla_len(nla), (const char *)nla_data(nla)); + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- type = __nf_tables_chain_type_lookup(afi->family, nla); ++ type = __nf_tables_chain_type_lookup(nla, family); + if (type != NULL) + return ERR_PTR(-EAGAIN); + } +@@ -1325,8 +1323,8 @@ static int nft_chain_parse_hook(struct n + + type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + if (nla[NFTA_CHAIN_TYPE]) { +- type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], +- create); ++ type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], ++ afi->family, create); + if (IS_ERR(type)) + return PTR_ERR(type); + } diff --git a/target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch b/target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch new file mode 100644 index 0000000000..a123d236ab --- /dev/null +++ b/target/linux/generic/backport-4.14/333-netfilter-nf_tables-fix-chain-filter-in-nf_tables_du.patch @@ -0,0 +1,24 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 19 Dec 2017 12:01:21 +0100 +Subject: [PATCH] netfilter: nf_tables: fix chain filter in + nf_tables_dump_rules() + +ctx->chain may be null now that we have very large object names, +so we cannot check for ctx->chain[0] here. + +Fixes: b7263e071aba7 ("netfilter: nf_tables: Allow table names of up to 255 chars") +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +Acked-by: Phil Sutter <phil@nwl.cc> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -2090,7 +2090,7 @@ static int nf_tables_dump_rules(struct s + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { +- if (ctx && ctx->chain[0] && ++ if (ctx && ctx->chain && + strcmp(ctx->chain, chain->name) != 0) + continue; + diff --git a/target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch b/target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch new file mode 100644 index 0000000000..57e9b53301 --- /dev/null +++ b/target/linux/generic/backport-4.14/334-netfilter-nf_tables-fix-potential-NULL-ptr-deref-in-.patch @@ -0,0 +1,30 @@ +From: Hangbin Liu <liuhangbin@gmail.com> +Date: Mon, 25 Dec 2017 11:34:54 +0800 +Subject: [PATCH] netfilter: nf_tables: fix potential NULL-ptr deref in + nf_tables_dump_obj_done() + +If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in +nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(), +we need to check if filter is NULL first. + +Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars") +Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> +Acked-by: Phil Sutter <phil@nwl.cc> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4614,8 +4614,10 @@ static int nf_tables_dump_obj_done(struc + { + struct nft_obj_filter *filter = cb->data; + +- kfree(filter->table); +- kfree(filter); ++ if (filter) { ++ kfree(filter->table); ++ kfree(filter); ++ } + + return 0; + } diff --git a/target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch b/target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch new file mode 100644 index 0000000000..bae2e2879e --- /dev/null +++ b/target/linux/generic/backport-4.14/335-netfilter-nf_tables-add-single-table-list-for-all-fa.patch @@ -0,0 +1,1449 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 9 Jan 2018 02:38:03 +0100 +Subject: [PATCH] netfilter: nf_tables: add single table list for all families + +Place all existing user defined tables in struct net *, instead of +having one list per family. This saves us from one level of indentation +in netlink dump functions. + +Place pointer to struct nft_af_info in struct nft_table temporarily, as +we still need this to put back reference module reference counter on +table removal. + +This patch comes in preparation for the removal of struct nft_af_info. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -143,22 +143,22 @@ static inline void nft_data_debug(const + * struct nft_ctx - nf_tables rule/set context + * + * @net: net namespace +- * @afi: address family info + * @table: the table the chain is contained in + * @chain: the chain the rule is contained in + * @nla: netlink attributes + * @portid: netlink portID of the original message + * @seq: netlink sequence number ++ * @family: protocol family + * @report: notify via unicast netlink message + */ + struct nft_ctx { + struct net *net; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + const struct nlattr * const *nla; + u32 portid; + u32 seq; ++ u8 family; + bool report; + }; + +@@ -939,6 +939,7 @@ unsigned int nft_do_chain(struct nft_pkt + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask ++ * @afinfo: address family info + * @name: name of the table + */ + struct nft_table { +@@ -951,6 +952,7 @@ struct nft_table { + u32 use; + u16 flags:14, + genmask:2; ++ struct nft_af_info *afi; + char *name; + }; + +@@ -960,13 +962,11 @@ struct nft_table { + * @list: used internally + * @family: address family + * @owner: module owner +- * @tables: used internally + */ + struct nft_af_info { + struct list_head list; + int family; + struct module *owner; +- struct list_head tables; + }; + + int nft_register_afinfo(struct net *, struct nft_af_info *); +--- a/include/net/netns/nftables.h ++++ b/include/net/netns/nftables.h +@@ -8,6 +8,7 @@ struct nft_af_info; + + struct netns_nftables { + struct list_head af_info; ++ struct list_head tables; + struct list_head commit_list; + struct nft_af_info *ipv4; + struct nft_af_info *ipv6; +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -37,7 +37,6 @@ static LIST_HEAD(nf_tables_flowtables); + */ + int nft_register_afinfo(struct net *net, struct nft_af_info *afi) + { +- INIT_LIST_HEAD(&afi->tables); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_tail_rcu(&afi->list, &net->nft.af_info); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +@@ -99,13 +98,13 @@ static void nft_ctx_init(struct nft_ctx + struct net *net, + const struct sk_buff *skb, + const struct nlmsghdr *nlh, +- struct nft_af_info *afi, ++ u8 family, + struct nft_table *table, + struct nft_chain *chain, + const struct nlattr * const *nla) + { + ctx->net = net; +- ctx->afi = afi; ++ ctx->family = family; + ctx->table = table; + ctx->chain = chain; + ctx->nla = nla; +@@ -385,30 +384,31 @@ static int nft_delflowtable(struct nft_c + * Tables + */ + +-static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, ++static struct nft_table *nft_table_lookup(const struct net *net, + const struct nlattr *nla, +- u8 genmask) ++ u8 family, u8 genmask) + { + struct nft_table *table; + +- list_for_each_entry(table, &afi->tables, list) { ++ list_for_each_entry(table, &net->nft.tables, list) { + if (!nla_strcmp(nla, table->name) && ++ table->afi->family == family && + nft_active_genmask(table, genmask)) + return table; + } + return NULL; + } + +-static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, ++static struct nft_table *nf_tables_table_lookup(const struct net *net, + const struct nlattr *nla, +- u8 genmask) ++ u8 family, u8 genmask) + { + struct nft_table *table; + + if (nla == NULL) + return ERR_PTR(-EINVAL); + +- table = nft_table_lookup(afi, nla, genmask); ++ table = nft_table_lookup(net, nla, family, genmask); + if (table != NULL) + return table; + +@@ -507,7 +507,7 @@ static void nf_tables_table_notify(const + goto err; + + err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq, +- event, 0, ctx->afi->family, ctx->table); ++ event, 0, ctx->family, ctx->table); + if (err < 0) { + kfree_skb(skb); + goto err; +@@ -524,7 +524,6 @@ static int nf_tables_dump_tables(struct + struct netlink_callback *cb) + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +- const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); +@@ -533,30 +532,27 @@ static int nf_tables_dump_tables(struct + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (family != NFPROTO_UNSPEC && family != afi->family) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (family != NFPROTO_UNSPEC && family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); +- if (!nft_is_active(net, table)) +- continue; +- if (nf_tables_fill_table_info(skb, net, +- NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- NFT_MSG_NEWTABLE, +- NLM_F_MULTI, +- afi->family, table) < 0) +- goto done; ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (!nft_is_active(net, table)) ++ continue; ++ if (nf_tables_fill_table_info(skb, net, ++ NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWTABLE, NLM_F_MULTI, ++ table->afi->family, table) < 0) ++ goto done; + +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } ++ idx++; + } + done: + rcu_read_unlock(); +@@ -588,7 +584,8 @@ static int nf_tables_gettable(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -719,7 +716,7 @@ static int nf_tables_newtable(struct net + return PTR_ERR(afi); + + name = nla[NFTA_TABLE_NAME]; +- table = nf_tables_table_lookup(afi, name, genmask); ++ table = nf_tables_table_lookup(net, name, afi->family, genmask); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); +@@ -729,7 +726,7 @@ static int nf_tables_newtable(struct net + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + return nf_tables_updtable(&ctx); + } + +@@ -756,14 +753,15 @@ static int nf_tables_newtable(struct net + INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); + INIT_LIST_HEAD(&table->flowtables); ++ table->afi = afi; + table->flags = flags; + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); + if (err < 0) + goto err4; + +- list_add_tail_rcu(&table->list, &afi->tables); ++ list_add_tail_rcu(&table->list, &net->nft.tables); + return 0; + err4: + kfree(table->name); +@@ -837,30 +835,28 @@ out: + + static int nft_flush(struct nft_ctx *ctx, int family) + { +- struct nft_af_info *afi; + struct nft_table *table, *nt; + const struct nlattr * const *nla = ctx->nla; + int err = 0; + +- list_for_each_entry(afi, &ctx->net->nft.af_info, list) { +- if (family != AF_UNSPEC && afi->family != family) ++ list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { ++ if (family != AF_UNSPEC && table->afi->family != family) + continue; + +- ctx->afi = afi; +- list_for_each_entry_safe(table, nt, &afi->tables, list) { +- if (!nft_is_active_next(ctx->net, table)) +- continue; ++ ctx->family = table->afi->family; + +- if (nla[NFTA_TABLE_NAME] && +- nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) +- continue; ++ if (!nft_is_active_next(ctx->net, table)) ++ continue; + +- ctx->table = table; ++ if (nla[NFTA_TABLE_NAME] && ++ nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) ++ continue; + +- err = nft_flush_table(ctx); +- if (err < 0) +- goto out; +- } ++ ctx->table = table; ++ ++ err = nft_flush_table(ctx); ++ if (err < 0) ++ goto out; + } + out: + return err; +@@ -878,7 +874,7 @@ static int nf_tables_deltable(struct net + int family = nfmsg->nfgen_family; + struct nft_ctx ctx; + +- nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); + if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) + return nft_flush(&ctx, family); + +@@ -886,7 +882,8 @@ static int nf_tables_deltable(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -894,7 +891,7 @@ static int nf_tables_deltable(struct net + table->use > 0) + return -EBUSY; + +- ctx.afi = afi; ++ ctx.family = afi->family; + ctx.table = table; + + return nft_flush_table(&ctx); +@@ -906,7 +903,7 @@ static void nf_tables_table_destroy(stru + + kfree(ctx->table->name); + kfree(ctx->table); +- module_put(ctx->afi->owner); ++ module_put(ctx->table->afi->owner); + } + + int nft_register_chain_type(const struct nf_chain_type *ctype) +@@ -1107,7 +1104,7 @@ static void nf_tables_chain_notify(const + goto err; + + err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq, +- event, 0, ctx->afi->family, ctx->table, ++ event, 0, ctx->family, ctx->table, + ctx->chain); + if (err < 0) { + kfree_skb(skb); +@@ -1125,7 +1122,6 @@ static int nf_tables_dump_chains(struct + struct netlink_callback *cb) + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +- const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + unsigned int idx = 0, s_idx = cb->args[0]; +@@ -1135,31 +1131,30 @@ static int nf_tables_dump_chains(struct + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (family != NFPROTO_UNSPEC && family != afi->family) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (family != NFPROTO_UNSPEC && family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- list_for_each_entry_rcu(chain, &table->chains, list) { +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); +- if (!nft_is_active(net, chain)) +- continue; +- if (nf_tables_fill_chain_info(skb, net, +- NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- NFT_MSG_NEWCHAIN, +- NLM_F_MULTI, +- afi->family, table, chain) < 0) +- goto done; ++ list_for_each_entry_rcu(chain, &table->chains, list) { ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (!nft_is_active(net, chain)) ++ continue; ++ if (nf_tables_fill_chain_info(skb, net, ++ NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWCHAIN, ++ NLM_F_MULTI, ++ table->afi->family, table, ++ chain) < 0) ++ goto done; + +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } ++ idx++; + } + } + done: +@@ -1193,7 +1188,8 @@ static int nf_tables_getchain(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -1301,8 +1297,8 @@ struct nft_chain_hook { + + static int nft_chain_parse_hook(struct net *net, + const struct nlattr * const nla[], +- struct nft_af_info *afi, +- struct nft_chain_hook *hook, bool create) ++ struct nft_chain_hook *hook, u8 family, ++ bool create) + { + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + const struct nf_chain_type *type; +@@ -1321,10 +1317,10 @@ static int nft_chain_parse_hook(struct n + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + +- type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; ++ type = chain_type[family][NFT_CHAIN_T_DEFAULT]; + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], +- afi->family, create); ++ family, create); + if (IS_ERR(type)) + return PTR_ERR(type); + } +@@ -1336,7 +1332,7 @@ static int nft_chain_parse_hook(struct n + hook->type = type; + + hook->dev = NULL; +- if (afi->family == NFPROTO_NETDEV) { ++ if (family == NFPROTO_NETDEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { +@@ -1371,7 +1367,6 @@ static int nf_tables_addchain(struct nft + { + const struct nlattr * const *nla = ctx->nla; + struct nft_table *table = ctx->table; +- struct nft_af_info *afi = ctx->afi; + struct nft_base_chain *basechain; + struct nft_stats __percpu *stats; + struct net *net = ctx->net; +@@ -1385,7 +1380,7 @@ static int nf_tables_addchain(struct nft + struct nft_chain_hook hook; + struct nf_hook_ops *ops; + +- err = nft_chain_parse_hook(net, nla, afi, &hook, create); ++ err = nft_chain_parse_hook(net, nla, &hook, family, create); + if (err < 0) + return err; + +@@ -1478,7 +1473,7 @@ static int nf_tables_updchain(struct nft + if (!nft_is_base_chain(chain)) + return -EBUSY; + +- err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook, ++ err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, + create); + if (err < 0) + return err; +@@ -1571,7 +1566,8 @@ static int nf_tables_newchain(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -1611,7 +1607,7 @@ static int nf_tables_newchain(struct net + } + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) +@@ -1645,7 +1641,8 @@ static int nf_tables_delchain(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -1657,7 +1654,7 @@ static int nf_tables_delchain(struct net + chain->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); + + use = chain->use; + list_for_each_entry(rule, &chain->rules, list) { +@@ -1822,7 +1819,7 @@ static int nf_tables_expr_parse(const st + if (err < 0) + return err; + +- type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]); ++ type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); + if (IS_ERR(type)) + return PTR_ERR(type); + +@@ -2045,7 +2042,7 @@ static void nf_tables_rule_notify(const + goto err; + + err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, +- event, 0, ctx->afi->family, ctx->table, ++ event, 0, ctx->family, ctx->table, + ctx->chain, rule); + if (err < 0) { + kfree_skb(skb); +@@ -2069,7 +2066,6 @@ static int nf_tables_dump_rules(struct s + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_rule_dump_ctx *ctx = cb->data; +- const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; +@@ -2080,39 +2076,37 @@ static int nf_tables_dump_rules(struct s + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (family != NFPROTO_UNSPEC && family != afi->family) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (family != NFPROTO_UNSPEC && family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- if (ctx && ctx->table && +- strcmp(ctx->table, table->name) != 0) +- continue; ++ if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) ++ continue; + +- list_for_each_entry_rcu(chain, &table->chains, list) { +- if (ctx && ctx->chain && +- strcmp(ctx->chain, chain->name) != 0) +- continue; ++ list_for_each_entry_rcu(chain, &table->chains, list) { ++ if (ctx && ctx->chain && ++ strcmp(ctx->chain, chain->name) != 0) ++ continue; + +- list_for_each_entry_rcu(rule, &chain->rules, list) { +- if (!nft_is_active(net, rule)) +- goto cont; +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); +- if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- NFT_MSG_NEWRULE, +- NLM_F_MULTI | NLM_F_APPEND, +- afi->family, table, chain, rule) < 0) +- goto done; ++ list_for_each_entry_rcu(rule, &chain->rules, list) { ++ if (!nft_is_active(net, rule)) ++ goto cont; ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWRULE, ++ NLM_F_MULTI | NLM_F_APPEND, ++ table->afi->family, ++ table, chain, rule) < 0) ++ goto done; + +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } ++ idx++; + } + } + } +@@ -2190,7 +2184,8 @@ static int nf_tables_getrule(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -2267,7 +2262,8 @@ static int nf_tables_newrule(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -2306,7 +2302,7 @@ static int nf_tables_newrule(struct net + return PTR_ERR(old_rule); + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); + + n = 0; + size = 0; +@@ -2441,7 +2437,8 @@ static int nf_tables_delrule(struct net + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -2452,7 +2449,7 @@ static int nf_tables_delrule(struct net + return PTR_ERR(chain); + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); + + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { +@@ -2650,13 +2647,13 @@ static int nft_ctx_init_from_setattr(str + if (afi == NULL) + return -EAFNOSUPPORT; + +- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], +- genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], ++ afi->family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + } + +- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla); + return 0; + } + +@@ -2783,7 +2780,7 @@ static int nf_tables_fill_set(struct sk_ + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); +- nfmsg->nfgen_family = ctx->afi->family; ++ nfmsg->nfgen_family = ctx->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); + +@@ -2875,10 +2872,8 @@ static int nf_tables_dump_sets(struct sk + { + const struct nft_set *set; + unsigned int idx, s_idx = cb->args[0]; +- struct nft_af_info *afi; + struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; + struct net *net = sock_net(skb->sk); +- int cur_family = cb->args[3]; + struct nft_ctx *ctx = cb->data, ctx_set; + + if (cb->args[1]) +@@ -2887,51 +2882,44 @@ static int nf_tables_dump_sets(struct sk + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (ctx->afi && ctx->afi != afi) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (ctx->family != NFPROTO_UNSPEC && ++ ctx->family != table->afi->family) + continue; + +- if (cur_family) { +- if (afi->family != cur_family) ++ if (ctx->table && ctx->table != table) ++ continue; ++ ++ if (cur_table) { ++ if (cur_table != table) + continue; + +- cur_family = 0; ++ cur_table = NULL; + } +- list_for_each_entry_rcu(table, &afi->tables, list) { +- if (ctx->table && ctx->table != table) +- continue; ++ idx = 0; ++ list_for_each_entry_rcu(set, &table->sets, list) { ++ if (idx < s_idx) ++ goto cont; ++ if (!nft_is_active(net, set)) ++ goto cont; + +- if (cur_table) { +- if (cur_table != table) +- continue; ++ ctx_set = *ctx; ++ ctx_set.table = table; ++ ctx_set.family = table->afi->family; + +- cur_table = NULL; ++ if (nf_tables_fill_set(skb, &ctx_set, set, ++ NFT_MSG_NEWSET, ++ NLM_F_MULTI) < 0) { ++ cb->args[0] = idx; ++ cb->args[2] = (unsigned long) table; ++ goto done; + } +- idx = 0; +- list_for_each_entry_rcu(set, &table->sets, list) { +- if (idx < s_idx) +- goto cont; +- if (!nft_is_active(net, set)) +- goto cont; +- +- ctx_set = *ctx; +- ctx_set.table = table; +- ctx_set.afi = afi; +- if (nf_tables_fill_set(skb, &ctx_set, set, +- NFT_MSG_NEWSET, +- NLM_F_MULTI) < 0) { +- cb->args[0] = idx; +- cb->args[2] = (unsigned long) table; +- cb->args[3] = afi->family; +- goto done; +- } +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } +- if (s_idx) +- s_idx = 0; ++ idx++; + } ++ if (s_idx) ++ s_idx = 0; + } + cb->args[1] = 1; + done: +@@ -3141,11 +3129,12 @@ static int nf_tables_newset(struct net * + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); + if (IS_ERR(set)) { +@@ -3410,12 +3399,12 @@ static int nft_ctx_init_from_elemattr(st + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], +- genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], ++ afi->family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla); + return 0; + } + +@@ -3520,7 +3509,6 @@ static int nf_tables_dump_set(struct sk_ + { + struct nft_set_dump_ctx *dump_ctx = cb->data; + struct net *net = sock_net(skb->sk); +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_set *set; + struct nft_set_dump_args args; +@@ -3532,21 +3520,19 @@ static int nf_tables_dump_set(struct sk_ + int event; + + rcu_read_lock(); +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (afi != dump_ctx->ctx.afi) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (dump_ctx->ctx.family != NFPROTO_UNSPEC && ++ dump_ctx->ctx.family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- if (table != dump_ctx->ctx.table) +- continue; ++ if (table != dump_ctx->ctx.table) ++ continue; + +- list_for_each_entry_rcu(set, &table->sets, list) { +- if (set == dump_ctx->set) { +- set_found = true; +- break; +- } ++ list_for_each_entry_rcu(set, &table->sets, list) { ++ if (set == dump_ctx->set) { ++ set_found = true; ++ break; + } +- break; + } + break; + } +@@ -3566,7 +3552,7 @@ static int nf_tables_dump_set(struct sk_ + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); +- nfmsg->nfgen_family = afi->family; ++ nfmsg->nfgen_family = table->afi->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + +@@ -3668,7 +3654,7 @@ static int nf_tables_fill_setelem_info(s + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); +- nfmsg->nfgen_family = ctx->afi->family; ++ nfmsg->nfgen_family = ctx->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff); + +@@ -3912,7 +3898,7 @@ static int nft_add_set_elem(struct nft_c + list_for_each_entry(binding, &set->bindings, list) { + struct nft_ctx bind_ctx = { + .net = ctx->net, +- .afi = ctx->afi, ++ .family = ctx->family, + .table = ctx->table, + .chain = (struct nft_chain *)binding->chain, + }; +@@ -4459,7 +4445,8 @@ static int nf_tables_newobj(struct net * + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -4477,7 +4464,7 @@ static int nf_tables_newobj(struct net * + return 0; + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + + type = nft_obj_type_get(objtype); + if (IS_ERR(type)) +@@ -4554,7 +4541,6 @@ struct nft_obj_filter { + static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) + { + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +- const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_obj_filter *filter = cb->data; +@@ -4569,38 +4555,37 @@ static int nf_tables_dump_obj(struct sk_ + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (family != NFPROTO_UNSPEC && family != afi->family) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (family != NFPROTO_UNSPEC && family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- list_for_each_entry_rcu(obj, &table->objects, list) { +- if (!nft_is_active(net, obj)) +- goto cont; +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); +- if (filter && filter->table[0] && +- strcmp(filter->table, table->name)) +- goto cont; +- if (filter && +- filter->type != NFT_OBJECT_UNSPEC && +- obj->ops->type->type != filter->type) +- goto cont; ++ list_for_each_entry_rcu(obj, &table->objects, list) { ++ if (!nft_is_active(net, obj)) ++ goto cont; ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (filter && filter->table[0] && ++ strcmp(filter->table, table->name)) ++ goto cont; ++ if (filter && ++ filter->type != NFT_OBJECT_UNSPEC && ++ obj->ops->type->type != filter->type) ++ goto cont; + +- if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- NFT_MSG_NEWOBJ, +- NLM_F_MULTI | NLM_F_APPEND, +- afi->family, table, obj, reset) < 0) +- goto done; ++ if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWOBJ, ++ NLM_F_MULTI | NLM_F_APPEND, ++ table->afi->family, table, ++ obj, reset) < 0) ++ goto done; + +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } ++ idx++; + } + } + done: +@@ -4687,7 +4672,8 @@ static int nf_tables_getobj(struct net * + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -4747,7 +4733,8 @@ static int nf_tables_delobj(struct net * + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -4758,7 +4745,7 @@ static int nf_tables_delobj(struct net * + if (obj->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + + return nft_delobj(&ctx, obj); + } +@@ -4796,7 +4783,7 @@ static void nf_tables_obj_notify(const s + struct nft_object *obj, int event) + { + nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event, +- ctx->afi->family, ctx->report, GFP_KERNEL); ++ ctx->family, ctx->report, GFP_KERNEL); + } + + /* +@@ -4986,7 +4973,7 @@ void nft_flow_table_iterate(struct net * + + rcu_read_lock(); + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- list_for_each_entry_rcu(table, &afi->tables, list) { ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { + iter(&flowtable->data, data); + } +@@ -5034,7 +5021,8 @@ static int nf_tables_newflowtable(struct + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], ++ afi->family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -5051,7 +5039,7 @@ static int nf_tables_newflowtable(struct + return 0; + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); + if (!flowtable) +@@ -5132,7 +5120,8 @@ static int nf_tables_delflowtable(struct + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], ++ afi->family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -5143,7 +5132,7 @@ static int nf_tables_delflowtable(struct + if (flowtable->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); + + return nft_delflowtable(&ctx, flowtable); + } +@@ -5212,40 +5201,37 @@ static int nf_tables_dump_flowtable(stru + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; +- const struct nft_af_info *afi; + const struct nft_table *table; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- if (family != NFPROTO_UNSPEC && family != afi->family) ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ if (family != NFPROTO_UNSPEC && family != table->afi->family) + continue; + +- list_for_each_entry_rcu(table, &afi->tables, list) { +- list_for_each_entry_rcu(flowtable, &table->flowtables, list) { +- if (!nft_is_active(net, flowtable)) +- goto cont; +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- memset(&cb->args[1], 0, +- sizeof(cb->args) - sizeof(cb->args[0])); +- if (filter && filter->table[0] && +- strcmp(filter->table, table->name)) +- goto cont; ++ list_for_each_entry_rcu(flowtable, &table->flowtables, list) { ++ if (!nft_is_active(net, flowtable)) ++ goto cont; ++ if (idx < s_idx) ++ goto cont; ++ if (idx > s_idx) ++ memset(&cb->args[1], 0, ++ sizeof(cb->args) - sizeof(cb->args[0])); ++ if (filter && filter->table[0] && ++ strcmp(filter->table, table->name)) ++ goto cont; + +- if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, +- cb->nlh->nlmsg_seq, +- NFT_MSG_NEWFLOWTABLE, +- NLM_F_MULTI | NLM_F_APPEND, +- afi->family, flowtable) < 0) +- goto done; ++ if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid, ++ cb->nlh->nlmsg_seq, ++ NFT_MSG_NEWFLOWTABLE, ++ NLM_F_MULTI | NLM_F_APPEND, ++ table->afi->family, flowtable) < 0) ++ goto done; + +- nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++ nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + cont: +- idx++; +- } ++ idx++; + } + } + done: +@@ -5328,7 +5314,8 @@ static int nf_tables_getflowtable(struct + if (IS_ERR(afi)) + return PTR_ERR(afi); + +- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask); ++ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], ++ afi->family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -5371,7 +5358,7 @@ static void nf_tables_flowtable_notify(s + + err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid, + ctx->seq, event, 0, +- ctx->afi->family, flowtable); ++ ctx->family, flowtable); + if (err < 0) { + kfree_skb(skb); + goto err; +@@ -5449,17 +5436,14 @@ static int nf_tables_flowtable_event(str + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nft_flowtable *flowtable; + struct nft_table *table; +- struct nft_af_info *afi; + + if (event != NETDEV_UNREGISTER) + return 0; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { +- list_for_each_entry(table, &afi->tables, list) { +- list_for_each_entry(flowtable, &table->flowtables, list) { +- nft_flowtable_event(event, dev, flowtable); +- } ++ list_for_each_entry(table, &dev_net(dev)->nft.tables, list) { ++ list_for_each_entry(flowtable, &table->flowtables, list) { ++ nft_flowtable_event(event, dev, flowtable); + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +@@ -6478,6 +6462,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump); + static int __net_init nf_tables_init_net(struct net *net) + { + INIT_LIST_HEAD(&net->nft.af_info); ++ INIT_LIST_HEAD(&net->nft.tables); + INIT_LIST_HEAD(&net->nft.commit_list); + net->nft.base_seq = 1; + return 0; +@@ -6514,10 +6499,10 @@ static void __nft_release_afinfo(struct + struct nft_set *set, *ns; + struct nft_ctx ctx = { + .net = net, +- .afi = afi, ++ .family = afi->family, + }; + +- list_for_each_entry_safe(table, nt, &afi->tables, list) { ++ list_for_each_entry_safe(table, nt, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) +--- a/net/netfilter/nf_tables_netdev.c ++++ b/net/netfilter/nf_tables_netdev.c +@@ -107,7 +107,6 @@ static int nf_tables_netdev_event(struct + unsigned long event, void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain, *nr; + struct nft_ctx ctx = { +@@ -119,20 +118,18 @@ static int nf_tables_netdev_event(struct + return NOTIFY_DONE; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { +- ctx.afi = afi; +- if (afi->family != NFPROTO_NETDEV) ++ list_for_each_entry(table, &ctx.net->nft.tables, list) { ++ if (table->afi->family != NFPROTO_NETDEV) + continue; + +- list_for_each_entry(table, &afi->tables, list) { +- ctx.table = table; +- list_for_each_entry_safe(chain, nr, &table->chains, list) { +- if (!nft_is_base_chain(chain)) +- continue; ++ ctx.family = table->afi->family; ++ ctx.table = table; ++ list_for_each_entry_safe(chain, nr, &table->chains, list) { ++ if (!nft_is_base_chain(chain)) ++ continue; + +- ctx.chain = chain; +- nft_netdev_event(event, dev, &ctx); +- } ++ ctx.chain = chain; ++ nft_netdev_event(event, dev, &ctx); + } + } + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgc + { + par->net = ctx->net; + par->table = ctx->table->name; +- switch (ctx->afi->family) { ++ switch (ctx->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; +@@ -175,7 +175,7 @@ nft_target_set_tgchk_param(struct xt_tgc + } else { + par->hook_mask = 0; + } +- par->family = ctx->afi->family; ++ par->family = ctx->family; + par->nft_compat = true; + } + +@@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx + par.net = ctx->net; + par.target = target; + par.targinfo = info; +- par.family = ctx->afi->family; ++ par.family = ctx->family; + if (par.target->destroy != NULL) + par.target->destroy(&par); + +@@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtch + { + par->net = ctx->net; + par->table = ctx->table->name; +- switch (ctx->afi->family) { ++ switch (ctx->family) { + case AF_INET: + entry->e4.ip.proto = proto; + entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; +@@ -389,7 +389,7 @@ nft_match_set_mtchk_param(struct xt_mtch + } else { + par->hook_mask = 0; + } +- par->family = ctx->afi->family; ++ par->family = ctx->family; + par->nft_compat = true; + } + +@@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx * + par.net = ctx->net; + par.match = match; + par.matchinfo = info; +- par.family = ctx->afi->family; ++ par.family = ctx->family; + if (par.match->destroy != NULL) + par.match->destroy(&par); + +@@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ct + + mt_name = nla_data(tb[NFTA_MATCH_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); +- family = ctx->afi->family; ++ family = ctx->family; + + /* Re-use the existing match if it's already loaded. */ + list_for_each_entry(nft_match, &nft_match_list, head) { +@@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_c + + tg_name = nla_data(tb[NFTA_TARGET_NAME]); + rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV])); +- family = ctx->afi->family; ++ family = ctx->family; + + /* Re-use the existing target if it's already loaded. */ + list_for_each_entry(nft_target, &nft_target_list, head) { +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + +- switch (ctx->afi->family) { ++ switch (ctx->family) { + case NFPROTO_IPV4: + len = FIELD_SIZEOF(struct nf_conntrack_tuple, + src.u3.ip); +@@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct + if (err < 0) + return err; + +- err = nf_ct_netns_get(ctx->net, ctx->afi->family); ++ err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + return err; + +@@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct + if (err < 0) + goto err1; + +- err = nf_ct_netns_get(ctx->net, ctx->afi->family); ++ err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err1; + +@@ -564,7 +564,7 @@ err1: + static void nft_ct_get_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) + { +- nf_ct_netns_put(ctx->net, ctx->afi->family); ++ nf_ct_netns_put(ctx->net, ctx->family); + } + + static void nft_ct_set_destroy(const struct nft_ctx *ctx, +@@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const str + struct nft_ct *priv = nft_expr_priv(expr); + + __nft_ct_set_destroy(ctx, priv); +- nf_ct_netns_put(ctx->net, ctx->afi->family); ++ nf_ct_netns_put(ctx->net, ctx->family); + } + + static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) +@@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const + struct nft_ct_helper_obj *priv = nft_obj_data(obj); + struct nf_conntrack_helper *help4, *help6; + char name[NF_CT_HELPER_NAME_LEN]; +- int family = ctx->afi->family; ++ int family = ctx->family; + + if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO]) + return -EINVAL; +@@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const + + switch (family) { + case NFPROTO_IPV4: +- if (ctx->afi->family == NFPROTO_IPV6) ++ if (ctx->family == NFPROTO_IPV6) + return -EINVAL; + + help4 = nf_conntrack_helper_try_module_get(name, family, + priv->l4proto); + break; + case NFPROTO_IPV6: +- if (ctx->afi->family == NFPROTO_IPV4) ++ if (ctx->family == NFPROTO_IPV4) + return -EINVAL; + + help6 = nf_conntrack_helper_try_module_get(name, family, +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -151,7 +151,7 @@ static int nft_flow_offload_init(const s + priv->flowtable = flowtable; + flowtable->use++; + +- return nf_ct_netns_get(ctx->net, ctx->afi->family); ++ return nf_ct_netns_get(ctx->net, ctx->family); + } + + static void nft_flow_offload_destroy(const struct nft_ctx *ctx, +@@ -160,7 +160,7 @@ static void nft_flow_offload_destroy(con + struct nft_flow_offload *priv = nft_expr_priv(expr); + + priv->flowtable->use--; +- nf_ct_netns_put(ctx->net, ctx->afi->family); ++ nf_ct_netns_put(ctx->net, ctx->family); + } + + static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) +--- a/net/netfilter/nft_log.c ++++ b/net/netfilter/nft_log.c +@@ -112,7 +112,7 @@ static int nft_log_init(const struct nft + break; + } + +- err = nf_logger_find_get(ctx->afi->family, li->type); ++ err = nf_logger_find_get(ctx->family, li->type); + if (err < 0) + goto err1; + +@@ -133,7 +133,7 @@ static void nft_log_destroy(const struct + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); + +- nf_logger_put(ctx->afi->family, li->type); ++ nf_logger_put(ctx->family, li->type); + } + + static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) +--- a/net/netfilter/nft_masq.c ++++ b/net/netfilter/nft_masq.c +@@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx * + } + } + +- return nf_ct_netns_get(ctx->net, ctx->afi->family); ++ return nf_ct_netns_get(ctx->net, ctx->family); + } + EXPORT_SYMBOL_GPL(nft_masq_init); + +--- a/net/netfilter/nft_meta.c ++++ b/net/netfilter/nft_meta.c +@@ -339,7 +339,7 @@ static int nft_meta_get_validate(const s + if (priv->key != NFT_META_SECPATH) + return 0; + +- switch (ctx->afi->family) { ++ switch (ctx->family) { + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; +@@ -370,7 +370,7 @@ int nft_meta_set_validate(const struct n + if (priv->key != NFT_META_PKTTYPE) + return 0; + +- switch (ctx->afi->family) { ++ switch (ctx->family) { + case NFPROTO_BRIDGE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; +--- a/net/netfilter/nft_nat.c ++++ b/net/netfilter/nft_nat.c +@@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft + return -EINVAL; + + family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); +- if (family != ctx->afi->family) ++ if (family != ctx->family) + return -EOPNOTSUPP; + + switch (family) { +--- a/net/netfilter/nft_redir.c ++++ b/net/netfilter/nft_redir.c +@@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx + return -EINVAL; + } + +- return nf_ct_netns_get(ctx->net, ctx->afi->family); ++ return nf_ct_netns_get(ctx->net, ctx->family); + } + EXPORT_SYMBOL_GPL(nft_redir_init); + diff --git a/target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch b/target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch new file mode 100644 index 0000000000..5bfa5aa833 --- /dev/null +++ b/target/linux/generic/backport-4.14/336-netfilter-exit_net-cleanup-check-added.patch @@ -0,0 +1,100 @@ +From: Vasily Averin <vvs@virtuozzo.com> +Date: Sun, 12 Nov 2017 14:32:37 +0300 +Subject: [PATCH] netfilter: exit_net cleanup check added + +Be sure that lists initialized in net_init hook was return to initial +state. + +Signed-off-by: Vasily Averin <vvs@virtuozzo.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c ++++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c +@@ -819,6 +819,7 @@ static void clusterip_net_exit(struct ne + cn->procdir = NULL; + #endif + nf_unregister_net_hook(net, &cip_arp_ops); ++ WARN_ON_ONCE(!list_empty(&cn->configs)); + } + + static struct pernet_operations clusterip_net_ops = { +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -6468,6 +6468,12 @@ static int __net_init nf_tables_init_net + return 0; + } + ++static void __net_exit nf_tables_exit_net(struct net *net) ++{ ++ WARN_ON_ONCE(!list_empty(&net->nft.af_info)); ++ WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); ++} ++ + int __nft_release_basechain(struct nft_ctx *ctx) + { + struct nft_rule *rule, *nr; +@@ -6545,6 +6551,7 @@ static void __nft_release_afinfo(struct + + static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, ++ .exit = nf_tables_exit_net, + }; + + static int __init nf_tables_module_init(void) +--- a/net/netfilter/nfnetlink_log.c ++++ b/net/netfilter/nfnetlink_log.c +@@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init( + + static void __net_exit nfnl_log_net_exit(struct net *net) + { ++ struct nfnl_log_net *log = nfnl_log_pernet(net); ++ unsigned int i; ++ + #ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); + #endif + nf_log_unset(net, &nfulnl_logger); ++ for (i = 0; i < INSTANCE_BUCKETS; i++) ++ WARN_ON_ONCE(!hlist_empty(&log->instance_table[i])); + } + + static struct pernet_operations nfnl_log_net_ops = { +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_ini + + static void __net_exit nfnl_queue_net_exit(struct net *net) + { ++ struct nfnl_queue_net *q = nfnl_queue_pernet(net); ++ unsigned int i; ++ + nf_unregister_queue_handler(net); + #ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); + #endif ++ for (i = 0; i < INSTANCE_BUCKETS; i++) ++ WARN_ON_ONCE(!hlist_empty(&q->instance_table[i])); + } + + static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +--- a/net/netfilter/x_tables.c ++++ b/net/netfilter/x_tables.c +@@ -1714,8 +1714,17 @@ static int __net_init xt_net_init(struct + return 0; + } + ++static void __net_exit xt_net_exit(struct net *net) ++{ ++ int i; ++ ++ for (i = 0; i < NFPROTO_NUMPROTO; i++) ++ WARN_ON_ONCE(!list_empty(&net->xt.tables[i])); ++} ++ + static struct pernet_operations xt_net_ops = { + .init = xt_net_init, ++ .exit = xt_net_exit, + }; + + static int __init xt_init(void) diff --git a/target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch b/target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch new file mode 100644 index 0000000000..28ce2dcf0b --- /dev/null +++ b/target/linux/generic/backport-4.14/337-netfilter-nf_tables-get-rid-of-pernet-families.patch @@ -0,0 +1,598 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 9 Jan 2018 02:42:11 +0100 +Subject: [PATCH] netfilter: nf_tables: get rid of pernet families + +Now that we have a single table list for each netns, we can get rid of +one pointer per family and the global afinfo list, thus, shrinking +struct netns for nftables that now becomes 64 bytes smaller. + +And call __nft_release_afinfo() from __net_exit path accordingly to +release netnamespace objects on removal. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -969,8 +969,8 @@ struct nft_af_info { + struct module *owner; + }; + +-int nft_register_afinfo(struct net *, struct nft_af_info *); +-void nft_unregister_afinfo(struct net *, struct nft_af_info *); ++int nft_register_afinfo(struct nft_af_info *); ++void nft_unregister_afinfo(struct nft_af_info *); + + int nft_register_chain_type(const struct nf_chain_type *); + void nft_unregister_chain_type(const struct nf_chain_type *); +--- a/include/net/netns/nftables.h ++++ b/include/net/netns/nftables.h +@@ -7,15 +7,8 @@ + struct nft_af_info; + + struct netns_nftables { +- struct list_head af_info; + struct list_head tables; + struct list_head commit_list; +- struct nft_af_info *ipv4; +- struct nft_af_info *ipv6; +- struct nft_af_info *inet; +- struct nft_af_info *arp; +- struct nft_af_info *bridge; +- struct nft_af_info *netdev; + unsigned int base_seq; + u8 gencursor; + }; +--- a/net/bridge/netfilter/nf_tables_bridge.c ++++ b/net/bridge/netfilter/nf_tables_bridge.c +@@ -47,34 +47,6 @@ static struct nft_af_info nft_af_bridge + .owner = THIS_MODULE, + }; + +-static int nf_tables_bridge_init_net(struct net *net) +-{ +- net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.bridge == NULL) +- return -ENOMEM; +- +- memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); +- +- if (nft_register_afinfo(net, net->nft.bridge) < 0) +- goto err; +- +- return 0; +-err: +- kfree(net->nft.bridge); +- return -ENOMEM; +-} +- +-static void nf_tables_bridge_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.bridge); +- kfree(net->nft.bridge); +-} +- +-static struct pernet_operations nf_tables_bridge_net_ops = { +- .init = nf_tables_bridge_init_net, +- .exit = nf_tables_bridge_exit_net, +-}; +- + static const struct nf_chain_type filter_bridge = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -98,17 +70,17 @@ static int __init nf_tables_bridge_init( + { + int ret; + +- ret = nft_register_chain_type(&filter_bridge); ++ ret = nft_register_afinfo(&nft_af_bridge); + if (ret < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_bridge_net_ops); ++ ret = nft_register_chain_type(&filter_bridge); + if (ret < 0) +- goto err_register_subsys; ++ goto err_register_chain; + + return ret; + +-err_register_subsys: ++err_register_chain: + nft_unregister_chain_type(&filter_bridge); + + return ret; +@@ -116,8 +88,8 @@ err_register_subsys: + + static void __exit nf_tables_bridge_exit(void) + { +- unregister_pernet_subsys(&nf_tables_bridge_net_ops); + nft_unregister_chain_type(&filter_bridge); ++ nft_unregister_afinfo(&nft_af_bridge); + } + + module_init(nf_tables_bridge_init); +--- a/net/ipv4/netfilter/nf_tables_arp.c ++++ b/net/ipv4/netfilter/nf_tables_arp.c +@@ -32,34 +32,6 @@ static struct nft_af_info nft_af_arp __r + .owner = THIS_MODULE, + }; + +-static int nf_tables_arp_init_net(struct net *net) +-{ +- net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.arp== NULL) +- return -ENOMEM; +- +- memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); +- +- if (nft_register_afinfo(net, net->nft.arp) < 0) +- goto err; +- +- return 0; +-err: +- kfree(net->nft.arp); +- return -ENOMEM; +-} +- +-static void nf_tables_arp_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.arp); +- kfree(net->nft.arp); +-} +- +-static struct pernet_operations nf_tables_arp_net_ops = { +- .init = nf_tables_arp_init_net, +- .exit = nf_tables_arp_exit_net, +-}; +- + static const struct nf_chain_type filter_arp = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -77,21 +49,26 @@ static int __init nf_tables_arp_init(voi + { + int ret; + +- ret = nft_register_chain_type(&filter_arp); ++ ret = nft_register_afinfo(&nft_af_arp); + if (ret < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_arp_net_ops); ++ ret = nft_register_chain_type(&filter_arp); + if (ret < 0) +- nft_unregister_chain_type(&filter_arp); ++ goto err_register_chain; ++ ++ return 0; ++ ++err_register_chain: ++ nft_unregister_chain_type(&filter_arp); + + return ret; + } + + static void __exit nf_tables_arp_exit(void) + { +- unregister_pernet_subsys(&nf_tables_arp_net_ops); + nft_unregister_chain_type(&filter_arp); ++ nft_unregister_afinfo(&nft_af_arp); + } + + module_init(nf_tables_arp_init); +--- a/net/ipv4/netfilter/nf_tables_ipv4.c ++++ b/net/ipv4/netfilter/nf_tables_ipv4.c +@@ -35,34 +35,6 @@ static struct nft_af_info nft_af_ipv4 __ + .owner = THIS_MODULE, + }; + +-static int nf_tables_ipv4_init_net(struct net *net) +-{ +- net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.ipv4 == NULL) +- return -ENOMEM; +- +- memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); +- +- if (nft_register_afinfo(net, net->nft.ipv4) < 0) +- goto err; +- +- return 0; +-err: +- kfree(net->nft.ipv4); +- return -ENOMEM; +-} +- +-static void nf_tables_ipv4_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.ipv4); +- kfree(net->nft.ipv4); +-} +- +-static struct pernet_operations nf_tables_ipv4_net_ops = { +- .init = nf_tables_ipv4_init_net, +- .exit = nf_tables_ipv4_exit_net, +-}; +- + static const struct nf_chain_type filter_ipv4 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -86,21 +58,25 @@ static int __init nf_tables_ipv4_init(vo + { + int ret; + +- ret = nft_register_chain_type(&filter_ipv4); ++ ret = nft_register_afinfo(&nft_af_ipv4); + if (ret < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); ++ ret = nft_register_chain_type(&filter_ipv4); + if (ret < 0) +- nft_unregister_chain_type(&filter_ipv4); ++ goto err_register_chain; ++ ++ return 0; + ++err_register_chain: ++ nft_unregister_afinfo(&nft_af_ipv4); + return ret; + } + + static void __exit nf_tables_ipv4_exit(void) + { +- unregister_pernet_subsys(&nf_tables_ipv4_net_ops); + nft_unregister_chain_type(&filter_ipv4); ++ nft_unregister_afinfo(&nft_af_ipv4); + } + + module_init(nf_tables_ipv4_init); +--- a/net/ipv6/netfilter/nf_tables_ipv6.c ++++ b/net/ipv6/netfilter/nf_tables_ipv6.c +@@ -33,34 +33,6 @@ static struct nft_af_info nft_af_ipv6 __ + .owner = THIS_MODULE, + }; + +-static int nf_tables_ipv6_init_net(struct net *net) +-{ +- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.ipv6 == NULL) +- return -ENOMEM; +- +- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); +- +- if (nft_register_afinfo(net, net->nft.ipv6) < 0) +- goto err; +- +- return 0; +-err: +- kfree(net->nft.ipv6); +- return -ENOMEM; +-} +- +-static void nf_tables_ipv6_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.ipv6); +- kfree(net->nft.ipv6); +-} +- +-static struct pernet_operations nf_tables_ipv6_net_ops = { +- .init = nf_tables_ipv6_init_net, +- .exit = nf_tables_ipv6_exit_net, +-}; +- + static const struct nf_chain_type filter_ipv6 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -84,20 +56,24 @@ static int __init nf_tables_ipv6_init(vo + { + int ret; + +- ret = nft_register_chain_type(&filter_ipv6); ++ ret = nft_register_afinfo(&nft_af_ipv6); + if (ret < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); ++ ret = nft_register_chain_type(&filter_ipv6); + if (ret < 0) +- nft_unregister_chain_type(&filter_ipv6); ++ goto err_register_chain; ++ ++ return 0; + ++err_register_chain: ++ nft_unregister_afinfo(&nft_af_ipv6); + return ret; + } + + static void __exit nf_tables_ipv6_exit(void) + { +- unregister_pernet_subsys(&nf_tables_ipv6_net_ops); ++ nft_unregister_afinfo(&nft_af_ipv6); + nft_unregister_chain_type(&filter_ipv6); + } + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -26,6 +26,7 @@ + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); ++static LIST_HEAD(nf_tables_af_info); + + /** + * nft_register_afinfo - register nf_tables address family info +@@ -35,17 +36,15 @@ static LIST_HEAD(nf_tables_flowtables); + * Register the address family for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +-int nft_register_afinfo(struct net *net, struct nft_af_info *afi) ++int nft_register_afinfo(struct nft_af_info *afi) + { + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_add_tail_rcu(&afi->list, &net->nft.af_info); ++ list_add_tail_rcu(&afi->list, &nf_tables_af_info); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; + } + EXPORT_SYMBOL_GPL(nft_register_afinfo); + +-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); +- + /** + * nft_unregister_afinfo - unregister nf_tables address family info + * +@@ -53,10 +52,9 @@ static void __nft_release_afinfo(struct + * + * Unregister the address family for use with nf_tables. + */ +-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) ++void nft_unregister_afinfo(struct nft_af_info *afi) + { + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- __nft_release_afinfo(net, afi); + list_del_rcu(&afi->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + } +@@ -66,7 +64,7 @@ static struct nft_af_info *nft_afinfo_lo + { + struct nft_af_info *afi; + +- list_for_each_entry(afi, &net->nft.af_info, list) { ++ list_for_each_entry(afi, &nf_tables_af_info, list) { + if (afi->family == family) + return afi; + } +@@ -4968,15 +4966,12 @@ void nft_flow_table_iterate(struct net * + void *data) + { + struct nft_flowtable *flowtable; +- const struct nft_af_info *afi; + const struct nft_table *table; + + rcu_read_lock(); +- list_for_each_entry_rcu(afi, &net->nft.af_info, list) { +- list_for_each_entry_rcu(table, &net->nft.tables, list) { +- list_for_each_entry_rcu(flowtable, &table->flowtables, list) { +- iter(&flowtable->data, data); +- } ++ list_for_each_entry_rcu(table, &net->nft.tables, list) { ++ list_for_each_entry_rcu(flowtable, &table->flowtables, list) { ++ iter(&flowtable->data, data); + } + } + rcu_read_unlock(); +@@ -6459,21 +6454,6 @@ int nft_data_dump(struct sk_buff *skb, i + } + EXPORT_SYMBOL_GPL(nft_data_dump); + +-static int __net_init nf_tables_init_net(struct net *net) +-{ +- INIT_LIST_HEAD(&net->nft.af_info); +- INIT_LIST_HEAD(&net->nft.tables); +- INIT_LIST_HEAD(&net->nft.commit_list); +- net->nft.base_seq = 1; +- return 0; +-} +- +-static void __net_exit nf_tables_exit_net(struct net *net) +-{ +- WARN_ON_ONCE(!list_empty(&net->nft.af_info)); +- WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); +-} +- + int __nft_release_basechain(struct nft_ctx *ctx) + { + struct nft_rule *rule, *nr; +@@ -6494,8 +6474,7 @@ int __nft_release_basechain(struct nft_c + } + EXPORT_SYMBOL_GPL(__nft_release_basechain); + +-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ +-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) ++static void __nft_release_afinfo(struct net *net) + { + struct nft_flowtable *flowtable, *nf; + struct nft_table *table, *nt; +@@ -6505,10 +6484,11 @@ static void __nft_release_afinfo(struct + struct nft_set *set, *ns; + struct nft_ctx ctx = { + .net = net, +- .family = afi->family, + }; + + list_for_each_entry_safe(table, nt, &net->nft.tables, list) { ++ ctx.family = table->afi->family; ++ + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + list_for_each_entry(flowtable, &table->flowtables, list) +@@ -6549,6 +6529,21 @@ static void __nft_release_afinfo(struct + } + } + ++static int __net_init nf_tables_init_net(struct net *net) ++{ ++ INIT_LIST_HEAD(&net->nft.tables); ++ INIT_LIST_HEAD(&net->nft.commit_list); ++ net->nft.base_seq = 1; ++ return 0; ++} ++ ++static void __net_exit nf_tables_exit_net(struct net *net) ++{ ++ __nft_release_afinfo(net); ++ WARN_ON_ONCE(!list_empty(&net->nft.tables)); ++ WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); ++} ++ + static struct pernet_operations nf_tables_net_ops = { + .init = nf_tables_init_net, + .exit = nf_tables_exit_net, +--- a/net/netfilter/nf_tables_inet.c ++++ b/net/netfilter/nf_tables_inet.c +@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_inet __ + .owner = THIS_MODULE, + }; + +-static int __net_init nf_tables_inet_init_net(struct net *net) +-{ +- net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.inet == NULL) +- return -ENOMEM; +- memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet)); +- +- if (nft_register_afinfo(net, net->nft.inet) < 0) +- goto err; +- +- return 0; +- +-err: +- kfree(net->nft.inet); +- return -ENOMEM; +-} +- +-static void __net_exit nf_tables_inet_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.inet); +- kfree(net->nft.inet); +-} +- +-static struct pernet_operations nf_tables_inet_net_ops = { +- .init = nf_tables_inet_init_net, +- .exit = nf_tables_inet_exit_net, +-}; +- + static const struct nf_chain_type filter_inet = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -94,21 +66,24 @@ static int __init nf_tables_inet_init(vo + { + int ret; + +- ret = nft_register_chain_type(&filter_inet); +- if (ret < 0) ++ if (nft_register_afinfo(&nft_af_inet) < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_inet_net_ops); ++ ret = nft_register_chain_type(&filter_inet); + if (ret < 0) +- nft_unregister_chain_type(&filter_inet); ++ goto err_register_chain; ++ ++ return ret; + ++err_register_chain: ++ nft_unregister_afinfo(&nft_af_inet); + return ret; + } + + static void __exit nf_tables_inet_exit(void) + { +- unregister_pernet_subsys(&nf_tables_inet_net_ops); + nft_unregister_chain_type(&filter_inet); ++ nft_unregister_afinfo(&nft_af_inet); + } + + module_init(nf_tables_inet_init); +--- a/net/netfilter/nf_tables_netdev.c ++++ b/net/netfilter/nf_tables_netdev.c +@@ -43,34 +43,6 @@ static struct nft_af_info nft_af_netdev + .owner = THIS_MODULE, + }; + +-static int nf_tables_netdev_init_net(struct net *net) +-{ +- net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +- if (net->nft.netdev == NULL) +- return -ENOMEM; +- +- memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev)); +- +- if (nft_register_afinfo(net, net->nft.netdev) < 0) +- goto err; +- +- return 0; +-err: +- kfree(net->nft.netdev); +- return -ENOMEM; +-} +- +-static void nf_tables_netdev_exit_net(struct net *net) +-{ +- nft_unregister_afinfo(net, net->nft.netdev); +- kfree(net->nft.netdev); +-} +- +-static struct pernet_operations nf_tables_netdev_net_ops = { +- .init = nf_tables_netdev_init_net, +- .exit = nf_tables_netdev_exit_net, +-}; +- + static const struct nf_chain_type nft_filter_chain_netdev = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -145,32 +117,32 @@ static int __init nf_tables_netdev_init( + { + int ret; + +- ret = nft_register_chain_type(&nft_filter_chain_netdev); +- if (ret) ++ if (nft_register_afinfo(&nft_af_netdev) < 0) + return ret; + +- ret = register_pernet_subsys(&nf_tables_netdev_net_ops); ++ ret = nft_register_chain_type(&nft_filter_chain_netdev); + if (ret) +- goto err1; ++ goto err_register_chain_type; + + ret = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) +- goto err2; ++ goto err_register_netdevice_notifier; + + return 0; + +-err2: +- unregister_pernet_subsys(&nf_tables_netdev_net_ops); +-err1: ++err_register_netdevice_notifier: + nft_unregister_chain_type(&nft_filter_chain_netdev); ++err_register_chain_type: ++ nft_unregister_afinfo(&nft_af_netdev); ++ + return ret; + } + + static void __exit nf_tables_netdev_exit(void) + { + unregister_netdevice_notifier(&nf_tables_netdev_notifier); +- unregister_pernet_subsys(&nf_tables_netdev_net_ops); + nft_unregister_chain_type(&nft_filter_chain_netdev); ++ nft_unregister_afinfo(&nft_af_netdev); + } + + module_init(nf_tables_netdev_init); diff --git a/target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch b/target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch new file mode 100644 index 0000000000..9e72b7b32d --- /dev/null +++ b/target/linux/generic/backport-4.14/338-netfilter-nf_tables-get-rid-of-struct-nft_af_info-ab.patch @@ -0,0 +1,1204 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 9 Jan 2018 02:48:47 +0100 +Subject: [PATCH] netfilter: nf_tables: get rid of struct nft_af_info + abstraction + +Remove the infrastructure to register/unregister nft_af_info structure, +this structure stores no useful information anymore. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -950,28 +950,12 @@ struct nft_table { + struct list_head flowtables; + u64 hgenerator; + u32 use; +- u16 flags:14, ++ u16 family:6, ++ flags:8, + genmask:2; +- struct nft_af_info *afi; + char *name; + }; + +-/** +- * struct nft_af_info - nf_tables address family info +- * +- * @list: used internally +- * @family: address family +- * @owner: module owner +- */ +-struct nft_af_info { +- struct list_head list; +- int family; +- struct module *owner; +-}; +- +-int nft_register_afinfo(struct nft_af_info *); +-void nft_unregister_afinfo(struct nft_af_info *); +- + int nft_register_chain_type(const struct nf_chain_type *); + void nft_unregister_chain_type(const struct nf_chain_type *); + +@@ -1139,9 +1123,6 @@ void nft_trace_notify(struct nft_tracein + #define nft_dereference(p) \ + nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) + +-#define MODULE_ALIAS_NFT_FAMILY(family) \ +- MODULE_ALIAS("nft-afinfo-" __stringify(family)) +- + #define MODULE_ALIAS_NFT_CHAIN(family, name) \ + MODULE_ALIAS("nft-chain-" __stringify(family) "-" name) + +--- a/net/bridge/netfilter/nf_tables_bridge.c ++++ b/net/bridge/netfilter/nf_tables_bridge.c +@@ -42,11 +42,6 @@ nft_do_chain_bridge(void *priv, + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_bridge __read_mostly = { +- .family = NFPROTO_BRIDGE, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type filter_bridge = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -68,28 +63,12 @@ static const struct nf_chain_type filter + + static int __init nf_tables_bridge_init(void) + { +- int ret; +- +- ret = nft_register_afinfo(&nft_af_bridge); +- if (ret < 0) +- return ret; +- +- ret = nft_register_chain_type(&filter_bridge); +- if (ret < 0) +- goto err_register_chain; +- +- return ret; +- +-err_register_chain: +- nft_unregister_chain_type(&filter_bridge); +- +- return ret; ++ return nft_register_chain_type(&filter_bridge); + } + + static void __exit nf_tables_bridge_exit(void) + { + nft_unregister_chain_type(&filter_bridge); +- nft_unregister_afinfo(&nft_af_bridge); + } + + module_init(nf_tables_bridge_init); +@@ -97,4 +76,4 @@ module_exit(nf_tables_bridge_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +-MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); ++MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter"); +--- a/net/ipv4/netfilter/nf_tables_arp.c ++++ b/net/ipv4/netfilter/nf_tables_arp.c +@@ -27,11 +27,6 @@ nft_do_chain_arp(void *priv, + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_arp __read_mostly = { +- .family = NFPROTO_ARP, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type filter_arp = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -47,28 +42,12 @@ static const struct nf_chain_type filter + + static int __init nf_tables_arp_init(void) + { +- int ret; +- +- ret = nft_register_afinfo(&nft_af_arp); +- if (ret < 0) +- return ret; +- +- ret = nft_register_chain_type(&filter_arp); +- if (ret < 0) +- goto err_register_chain; +- +- return 0; +- +-err_register_chain: +- nft_unregister_chain_type(&filter_arp); +- +- return ret; ++ return nft_register_chain_type(&filter_arp); + } + + static void __exit nf_tables_arp_exit(void) + { + nft_unregister_chain_type(&filter_arp); +- nft_unregister_afinfo(&nft_af_arp); + } + + module_init(nf_tables_arp_init); +@@ -76,4 +55,4 @@ module_exit(nf_tables_arp_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +-MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ ++MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */ +--- a/net/ipv4/netfilter/nf_tables_ipv4.c ++++ b/net/ipv4/netfilter/nf_tables_ipv4.c +@@ -30,11 +30,6 @@ static unsigned int nft_do_chain_ipv4(vo + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_ipv4 __read_mostly = { +- .family = NFPROTO_IPV4, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type filter_ipv4 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -56,27 +51,12 @@ static const struct nf_chain_type filter + + static int __init nf_tables_ipv4_init(void) + { +- int ret; +- +- ret = nft_register_afinfo(&nft_af_ipv4); +- if (ret < 0) +- return ret; +- +- ret = nft_register_chain_type(&filter_ipv4); +- if (ret < 0) +- goto err_register_chain; +- +- return 0; +- +-err_register_chain: +- nft_unregister_afinfo(&nft_af_ipv4); +- return ret; ++ return nft_register_chain_type(&filter_ipv4); + } + + static void __exit nf_tables_ipv4_exit(void) + { + nft_unregister_chain_type(&filter_ipv4); +- nft_unregister_afinfo(&nft_af_ipv4); + } + + module_init(nf_tables_ipv4_init); +@@ -84,4 +64,4 @@ module_exit(nf_tables_ipv4_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +-MODULE_ALIAS_NFT_FAMILY(AF_INET); ++MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter"); +--- a/net/ipv6/netfilter/nf_tables_ipv6.c ++++ b/net/ipv6/netfilter/nf_tables_ipv6.c +@@ -28,11 +28,6 @@ static unsigned int nft_do_chain_ipv6(vo + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_ipv6 __read_mostly = { +- .family = NFPROTO_IPV6, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type filter_ipv6 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -54,26 +49,11 @@ static const struct nf_chain_type filter + + static int __init nf_tables_ipv6_init(void) + { +- int ret; +- +- ret = nft_register_afinfo(&nft_af_ipv6); +- if (ret < 0) +- return ret; +- +- ret = nft_register_chain_type(&filter_ipv6); +- if (ret < 0) +- goto err_register_chain; +- +- return 0; +- +-err_register_chain: +- nft_unregister_afinfo(&nft_af_ipv6); +- return ret; ++ return nft_register_chain_type(&filter_ipv6); + } + + static void __exit nf_tables_ipv6_exit(void) + { +- nft_unregister_afinfo(&nft_af_ipv6); + nft_unregister_chain_type(&filter_ipv6); + } + +@@ -82,4 +62,4 @@ module_exit(nf_tables_ipv6_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +-MODULE_ALIAS_NFT_FAMILY(AF_INET6); ++MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter"); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -26,71 +26,6 @@ + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); +-static LIST_HEAD(nf_tables_af_info); +- +-/** +- * nft_register_afinfo - register nf_tables address family info +- * +- * @afi: address family info to register +- * +- * Register the address family for use with nf_tables. Returns zero on +- * success or a negative errno code otherwise. +- */ +-int nft_register_afinfo(struct nft_af_info *afi) +-{ +- nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_add_tail_rcu(&afi->list, &nf_tables_af_info); +- nfnl_unlock(NFNL_SUBSYS_NFTABLES); +- return 0; +-} +-EXPORT_SYMBOL_GPL(nft_register_afinfo); +- +-/** +- * nft_unregister_afinfo - unregister nf_tables address family info +- * +- * @afi: address family info to unregister +- * +- * Unregister the address family for use with nf_tables. +- */ +-void nft_unregister_afinfo(struct nft_af_info *afi) +-{ +- nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_del_rcu(&afi->list); +- nfnl_unlock(NFNL_SUBSYS_NFTABLES); +-} +-EXPORT_SYMBOL_GPL(nft_unregister_afinfo); +- +-static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family) +-{ +- struct nft_af_info *afi; +- +- list_for_each_entry(afi, &nf_tables_af_info, list) { +- if (afi->family == family) +- return afi; +- } +- return NULL; +-} +- +-static struct nft_af_info * +-nf_tables_afinfo_lookup(struct net *net, int family, bool autoload) +-{ +- struct nft_af_info *afi; +- +- afi = nft_afinfo_lookup(net, family); +- if (afi != NULL) +- return afi; +-#ifdef CONFIG_MODULES +- if (autoload) { +- nfnl_unlock(NFNL_SUBSYS_NFTABLES); +- request_module("nft-afinfo-%u", family); +- nfnl_lock(NFNL_SUBSYS_NFTABLES); +- afi = nft_afinfo_lookup(net, family); +- if (afi != NULL) +- return ERR_PTR(-EAGAIN); +- } +-#endif +- return ERR_PTR(-EAFNOSUPPORT); +-} + + static void nft_ctx_init(struct nft_ctx *ctx, + struct net *net, +@@ -390,7 +325,7 @@ static struct nft_table *nft_table_looku + + list_for_each_entry(table, &net->nft.tables, list) { + if (!nla_strcmp(nla, table->name) && +- table->afi->family == family && ++ table->family == family && + nft_active_genmask(table, genmask)) + return table; + } +@@ -531,7 +466,7 @@ static int nf_tables_dump_tables(struct + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { +- if (family != NFPROTO_UNSPEC && family != table->afi->family) ++ if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + if (idx < s_idx) +@@ -545,7 +480,7 @@ static int nf_tables_dump_tables(struct + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWTABLE, NLM_F_MULTI, +- table->afi->family, table) < 0) ++ table->family, table) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +@@ -565,7 +500,6 @@ static int nf_tables_gettable(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); +- const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int family = nfmsg->nfgen_family; +@@ -578,11 +512,7 @@ static int nf_tables_gettable(struct net + return netlink_dump_start(nlsk, skb, nlh, &c); + } + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -702,19 +632,14 @@ static int nf_tables_newtable(struct net + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + const struct nlattr *name; +- struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + u32 flags = 0; + struct nft_ctx ctx; + int err; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- + name = nla[NFTA_TABLE_NAME]; +- table = nf_tables_table_lookup(net, name, afi->family, genmask); ++ table = nf_tables_table_lookup(net, name, family, genmask); + if (IS_ERR(table)) { + if (PTR_ERR(table) != -ENOENT) + return PTR_ERR(table); +@@ -724,7 +649,7 @@ static int nf_tables_newtable(struct net + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + return nf_tables_updtable(&ctx); + } + +@@ -734,40 +659,34 @@ static int nf_tables_newtable(struct net + return -EINVAL; + } + +- err = -EAFNOSUPPORT; +- if (!try_module_get(afi->owner)) +- goto err1; +- + err = -ENOMEM; + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (table == NULL) +- goto err2; ++ goto err_kzalloc; + + table->name = nla_strdup(name, GFP_KERNEL); + if (table->name == NULL) +- goto err3; ++ goto err_strdup; + + INIT_LIST_HEAD(&table->chains); + INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); + INIT_LIST_HEAD(&table->flowtables); +- table->afi = afi; ++ table->family = family; + table->flags = flags; + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); + if (err < 0) +- goto err4; ++ goto err_trans; + + list_add_tail_rcu(&table->list, &net->nft.tables); + return 0; +-err4: ++err_trans: + kfree(table->name); +-err3: ++err_strdup: + kfree(table); +-err2: +- module_put(afi->owner); +-err1: ++err_kzalloc: + return err; + } + +@@ -838,10 +757,10 @@ static int nft_flush(struct nft_ctx *ctx + int err = 0; + + list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { +- if (family != AF_UNSPEC && table->afi->family != family) ++ if (family != AF_UNSPEC && table->family != family) + continue; + +- ctx->family = table->afi->family; ++ ctx->family = table->family; + + if (!nft_is_active_next(ctx->net, table)) + continue; +@@ -867,7 +786,6 @@ static int nf_tables_deltable(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); +- struct nft_af_info *afi; + struct nft_table *table; + int family = nfmsg->nfgen_family; + struct nft_ctx ctx; +@@ -876,11 +794,7 @@ static int nf_tables_deltable(struct net + if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) + return nft_flush(&ctx, family); + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -889,7 +803,7 @@ static int nf_tables_deltable(struct net + table->use > 0) + return -EBUSY; + +- ctx.family = afi->family; ++ ctx.family = family; + ctx.table = table; + + return nft_flush_table(&ctx); +@@ -901,7 +815,6 @@ static void nf_tables_table_destroy(stru + + kfree(ctx->table->name); + kfree(ctx->table); +- module_put(ctx->table->afi->owner); + } + + int nft_register_chain_type(const struct nf_chain_type *ctype) +@@ -1130,7 +1043,7 @@ static int nf_tables_dump_chains(struct + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { +- if (family != NFPROTO_UNSPEC && family != table->afi->family) ++ if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { +@@ -1146,7 +1059,7 @@ static int nf_tables_dump_chains(struct + cb->nlh->nlmsg_seq, + NFT_MSG_NEWCHAIN, + NLM_F_MULTI, +- table->afi->family, table, ++ table->family, table, + chain) < 0) + goto done; + +@@ -1168,7 +1081,6 @@ static int nf_tables_getchain(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); +- const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + struct sk_buff *skb2; +@@ -1182,11 +1094,7 @@ static int nf_tables_getchain(struct net + return netlink_dump_start(nlsk, skb, nlh, &c); + } + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -1550,7 +1458,6 @@ static int nf_tables_newchain(struct net + const struct nlattr * uninitialized_var(name); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + u8 policy = NF_ACCEPT; +@@ -1560,11 +1467,7 @@ static int nf_tables_newchain(struct net + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -1605,7 +1508,7 @@ static int nf_tables_newchain(struct net + } + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) +@@ -1626,7 +1529,6 @@ static int nf_tables_delchain(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule; +@@ -1635,11 +1537,7 @@ static int nf_tables_delchain(struct net + u32 use; + int err; + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -1652,7 +1550,7 @@ static int nf_tables_delchain(struct net + chain->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); + + use = chain->use; + list_for_each_entry(rule, &chain->rules, list) { +@@ -2075,7 +1973,7 @@ static int nf_tables_dump_rules(struct s + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { +- if (family != NFPROTO_UNSPEC && family != table->afi->family) ++ if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) +@@ -2098,7 +1996,7 @@ static int nf_tables_dump_rules(struct s + cb->nlh->nlmsg_seq, + NFT_MSG_NEWRULE, + NLM_F_MULTI | NLM_F_APPEND, +- table->afi->family, ++ table->family, + table, chain, rule) < 0) + goto done; + +@@ -2134,7 +2032,6 @@ static int nf_tables_getrule(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); +- const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_chain *chain; + const struct nft_rule *rule; +@@ -2178,11 +2075,7 @@ static int nf_tables_getrule(struct net + return netlink_dump_start(nlsk, skb, nlh, &c); + } + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -2240,7 +2133,7 @@ static int nf_tables_newrule(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); +- struct nft_af_info *afi; ++ int family = nfmsg->nfgen_family; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule, *old_rule = NULL; +@@ -2256,11 +2149,7 @@ static int nf_tables_newrule(struct net + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + +- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -2300,7 +2189,7 @@ static int nf_tables_newrule(struct net + return PTR_ERR(old_rule); + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); + + n = 0; + size = 0; +@@ -2424,18 +2313,13 @@ static int nf_tables_delrule(struct net + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain = NULL; + struct nft_rule *rule; + int family = nfmsg->nfgen_family, err = 0; + struct nft_ctx ctx; + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -2447,7 +2331,7 @@ static int nf_tables_delrule(struct net + return PTR_ERR(chain); + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, chain, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); + + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { +@@ -2632,26 +2516,17 @@ static int nft_ctx_init_from_setattr(str + u8 genmask) + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); +- struct nft_af_info *afi = NULL; ++ int family = nfmsg->nfgen_family; + struct nft_table *table = NULL; + +- if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { +- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- } +- + if (nla[NFTA_SET_TABLE] != NULL) { +- if (afi == NULL) +- return -EAFNOSUPPORT; +- + table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], +- afi->family, genmask); ++ family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + } + +- nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); + return 0; + } + +@@ -2882,7 +2757,7 @@ static int nf_tables_dump_sets(struct sk + + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (ctx->family != NFPROTO_UNSPEC && +- ctx->family != table->afi->family) ++ ctx->family != table->family) + continue; + + if (ctx->table && ctx->table != table) +@@ -2903,7 +2778,7 @@ static int nf_tables_dump_sets(struct sk + + ctx_set = *ctx; + ctx_set.table = table; +- ctx_set.family = table->afi->family; ++ ctx_set.family = table->family; + + if (nf_tables_fill_set(skb, &ctx_set, set, + NFT_MSG_NEWSET, +@@ -3015,8 +2890,8 @@ static int nf_tables_newset(struct net * + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); ++ int family = nfmsg->nfgen_family; + const struct nft_set_ops *ops; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_set *set; + struct nft_ctx ctx; +@@ -3123,16 +2998,12 @@ static int nf_tables_newset(struct net * + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + +- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); + if (IS_ERR(set)) { +@@ -3390,19 +3261,15 @@ static int nft_ctx_init_from_elemattr(st + u8 genmask) + { + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); +- struct nft_af_info *afi; ++ int family = nfmsg->nfgen_family; + struct nft_table *table; + +- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- + table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], +- afi->family, genmask); ++ family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +- nft_ctx_init(ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); + return 0; + } + +@@ -3520,7 +3387,7 @@ static int nf_tables_dump_set(struct sk_ + rcu_read_lock(); + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (dump_ctx->ctx.family != NFPROTO_UNSPEC && +- dump_ctx->ctx.family != table->afi->family) ++ dump_ctx->ctx.family != table->family) + continue; + + if (table != dump_ctx->ctx.table) +@@ -3550,7 +3417,7 @@ static int nf_tables_dump_set(struct sk_ + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); +- nfmsg->nfgen_family = table->afi->family; ++ nfmsg->nfgen_family = table->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + +@@ -4427,7 +4294,6 @@ static int nf_tables_newobj(struct net * + const struct nft_object_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; +@@ -4439,11 +4305,7 @@ static int nf_tables_newobj(struct net * + !nla[NFTA_OBJ_DATA]) + return -EINVAL; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -4462,7 +4324,7 @@ static int nf_tables_newobj(struct net * + return 0; + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + type = nft_obj_type_get(objtype); + if (IS_ERR(type)) +@@ -4554,7 +4416,7 @@ static int nf_tables_dump_obj(struct sk_ + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { +- if (family != NFPROTO_UNSPEC && family != table->afi->family) ++ if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + list_for_each_entry_rcu(obj, &table->objects, list) { +@@ -4577,7 +4439,7 @@ static int nf_tables_dump_obj(struct sk_ + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, +- table->afi->family, table, ++ table->family, table, + obj, reset) < 0) + goto done; + +@@ -4635,7 +4497,6 @@ static int nf_tables_getobj(struct net * + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; +- const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_object *obj; + struct sk_buff *skb2; +@@ -4666,11 +4527,7 @@ static int nf_tables_getobj(struct net * + !nla[NFTA_OBJ_TYPE]) + return -EINVAL; + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -4717,7 +4574,6 @@ static int nf_tables_delobj(struct net * + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; +@@ -4727,11 +4583,7 @@ static int nf_tables_delobj(struct net * + !nla[NFTA_OBJ_NAME]) + return -EINVAL; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- +- table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], afi->family, ++ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, + genmask); + if (IS_ERR(table)) + return PTR_ERR(table); +@@ -4743,7 +4595,7 @@ static int nf_tables_delobj(struct net * + if (obj->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + return nft_delobj(&ctx, obj); + } +@@ -4928,33 +4780,31 @@ err1: + return err; + } + +-static const struct nf_flowtable_type * +-__nft_flowtable_type_get(const struct nft_af_info *afi) ++static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family) + { + const struct nf_flowtable_type *type; + + list_for_each_entry(type, &nf_tables_flowtables, list) { +- if (afi->family == type->family) ++ if (family == type->family) + return type; + } + return NULL; + } + +-static const struct nf_flowtable_type * +-nft_flowtable_type_get(const struct nft_af_info *afi) ++static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) + { + const struct nf_flowtable_type *type; + +- type = __nft_flowtable_type_get(afi); ++ type = __nft_flowtable_type_get(family); + if (type != NULL && try_module_get(type->owner)) + return type; + + #ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +- request_module("nf-flowtable-%u", afi->family); ++ request_module("nf-flowtable-%u", family); + nfnl_lock(NFNL_SUBSYS_NFTABLES); +- if (__nft_flowtable_type_get(afi)) ++ if (__nft_flowtable_type_get(family)) + return ERR_PTR(-EAGAIN); + } + #endif +@@ -5002,7 +4852,6 @@ static int nf_tables_newflowtable(struct + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_ctx ctx; + int err, i, k; +@@ -5012,12 +4861,8 @@ static int nf_tables_newflowtable(struct + !nla[NFTA_FLOWTABLE_HOOK]) + return -EINVAL; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], +- afi->family, genmask); ++ family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -5034,7 +4879,7 @@ static int nf_tables_newflowtable(struct + return 0; + } + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL); + if (!flowtable) +@@ -5047,7 +4892,7 @@ static int nf_tables_newflowtable(struct + goto err1; + } + +- type = nft_flowtable_type_get(afi); ++ type = nft_flowtable_type_get(family); + if (IS_ERR(type)) { + err = PTR_ERR(type); + goto err2; +@@ -5107,16 +4952,11 @@ static int nf_tables_delflowtable(struct + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; +- struct nft_af_info *afi; + struct nft_table *table; + struct nft_ctx ctx; + +- afi = nf_tables_afinfo_lookup(net, family, true); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], +- afi->family, genmask); ++ family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -5127,7 +4967,7 @@ static int nf_tables_delflowtable(struct + if (flowtable->use > 0) + return -EBUSY; + +- nft_ctx_init(&ctx, net, skb, nlh, afi->family, table, NULL, nla); ++ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + return nft_delflowtable(&ctx, flowtable); + } +@@ -5202,7 +5042,7 @@ static int nf_tables_dump_flowtable(stru + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { +- if (family != NFPROTO_UNSPEC && family != table->afi->family) ++ if (family != NFPROTO_UNSPEC && family != table->family) + continue; + + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { +@@ -5221,7 +5061,7 @@ static int nf_tables_dump_flowtable(stru + cb->nlh->nlmsg_seq, + NFT_MSG_NEWFLOWTABLE, + NLM_F_MULTI | NLM_F_APPEND, +- table->afi->family, flowtable) < 0) ++ table->family, flowtable) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +@@ -5279,7 +5119,6 @@ static int nf_tables_getflowtable(struct + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + struct nft_flowtable *flowtable; +- const struct nft_af_info *afi; + const struct nft_table *table; + struct sk_buff *skb2; + int err; +@@ -5305,12 +5144,8 @@ static int nf_tables_getflowtable(struct + if (!nla[NFTA_FLOWTABLE_NAME]) + return -EINVAL; + +- afi = nf_tables_afinfo_lookup(net, family, false); +- if (IS_ERR(afi)) +- return PTR_ERR(afi); +- + table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], +- afi->family, genmask); ++ family, genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -6474,7 +6309,7 @@ int __nft_release_basechain(struct nft_c + } + EXPORT_SYMBOL_GPL(__nft_release_basechain); + +-static void __nft_release_afinfo(struct net *net) ++static void __nft_release_tables(struct net *net) + { + struct nft_flowtable *flowtable, *nf; + struct nft_table *table, *nt; +@@ -6487,7 +6322,7 @@ static void __nft_release_afinfo(struct + }; + + list_for_each_entry_safe(table, nt, &net->nft.tables, list) { +- ctx.family = table->afi->family; ++ ctx.family = table->family; + + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); +@@ -6539,7 +6374,7 @@ static int __net_init nf_tables_init_net + + static void __net_exit nf_tables_exit_net(struct net *net) + { +- __nft_release_afinfo(net); ++ __nft_release_tables(net); + WARN_ON_ONCE(!list_empty(&net->nft.tables)); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); + } +--- a/net/netfilter/nf_tables_inet.c ++++ b/net/netfilter/nf_tables_inet.c +@@ -38,11 +38,6 @@ static unsigned int nft_do_chain_inet(vo + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_inet __read_mostly = { +- .family = NFPROTO_INET, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type filter_inet = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -64,26 +59,12 @@ static const struct nf_chain_type filter + + static int __init nf_tables_inet_init(void) + { +- int ret; +- +- if (nft_register_afinfo(&nft_af_inet) < 0) +- return ret; +- +- ret = nft_register_chain_type(&filter_inet); +- if (ret < 0) +- goto err_register_chain; +- +- return ret; +- +-err_register_chain: +- nft_unregister_afinfo(&nft_af_inet); +- return ret; ++ return nft_register_chain_type(&filter_inet); + } + + static void __exit nf_tables_inet_exit(void) + { + nft_unregister_chain_type(&filter_inet); +- nft_unregister_afinfo(&nft_af_inet); + } + + module_init(nf_tables_inet_init); +@@ -91,4 +72,4 @@ module_exit(nf_tables_inet_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +-MODULE_ALIAS_NFT_FAMILY(1); ++MODULE_ALIAS_NFT_CHAIN(1, "filter"); +--- a/net/netfilter/nf_tables_netdev.c ++++ b/net/netfilter/nf_tables_netdev.c +@@ -38,11 +38,6 @@ nft_do_chain_netdev(void *priv, struct s + return nft_do_chain(&pkt, priv); + } + +-static struct nft_af_info nft_af_netdev __read_mostly = { +- .family = NFPROTO_NETDEV, +- .owner = THIS_MODULE, +-}; +- + static const struct nf_chain_type nft_filter_chain_netdev = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, +@@ -91,10 +86,10 @@ static int nf_tables_netdev_event(struct + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_for_each_entry(table, &ctx.net->nft.tables, list) { +- if (table->afi->family != NFPROTO_NETDEV) ++ if (table->family != NFPROTO_NETDEV) + continue; + +- ctx.family = table->afi->family; ++ ctx.family = table->family; + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { + if (!nft_is_base_chain(chain)) +@@ -117,12 +112,9 @@ static int __init nf_tables_netdev_init( + { + int ret; + +- if (nft_register_afinfo(&nft_af_netdev) < 0) +- return ret; +- + ret = nft_register_chain_type(&nft_filter_chain_netdev); + if (ret) +- goto err_register_chain_type; ++ return ret; + + ret = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) +@@ -132,8 +124,6 @@ static int __init nf_tables_netdev_init( + + err_register_netdevice_notifier: + nft_unregister_chain_type(&nft_filter_chain_netdev); +-err_register_chain_type: +- nft_unregister_afinfo(&nft_af_netdev); + + return ret; + } +@@ -142,7 +132,6 @@ static void __exit nf_tables_netdev_exit + { + unregister_netdevice_notifier(&nf_tables_netdev_notifier); + nft_unregister_chain_type(&nft_filter_chain_netdev); +- nft_unregister_afinfo(&nft_af_netdev); + } + + module_init(nf_tables_netdev_init); +@@ -150,4 +139,4 @@ module_exit(nf_tables_netdev_exit); + + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +-MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */ ++MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */ diff --git a/target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch b/target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch new file mode 100644 index 0000000000..acca41ae3e --- /dev/null +++ b/target/linux/generic/backport-4.14/339-netfilter-nft_flow_offload-wait-for-garbage-collecto.patch @@ -0,0 +1,47 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Thu, 1 Feb 2018 18:49:00 +0100 +Subject: [PATCH] netfilter: nft_flow_offload: wait for garbage collector + to run after cleanup + +If netdevice goes down, then flowtable entries are scheduled to be +removed. Wait for garbage collector to have a chance to run so it can +delete them from the hashtable. + +The flush call might sleep, so hold the nfnl mutex from +nft_flow_table_iterate() instead of rcu read side lock. The use of the +nfnl mutex is also implicitly fixing races between updates via nfnetlink +and netdevice event. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4818,13 +4818,13 @@ void nft_flow_table_iterate(struct net * + struct nft_flowtable *flowtable; + const struct nft_table *table; + +- rcu_read_lock(); +- list_for_each_entry_rcu(table, &net->nft.tables, list) { +- list_for_each_entry_rcu(flowtable, &table->flowtables, list) { ++ nfnl_lock(NFNL_SUBSYS_NFTABLES); ++ list_for_each_entry(table, &net->nft.tables, list) { ++ list_for_each_entry(flowtable, &table->flowtables, list) { + iter(&flowtable->data, data); + } + } +- rcu_read_unlock(); ++ nfnl_unlock(NFNL_SUBSYS_NFTABLES); + } + EXPORT_SYMBOL_GPL(nft_flow_table_iterate); + +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -208,6 +208,7 @@ static void nft_flow_offload_iterate_cle + void *data) + { + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); ++ flush_delayed_work(&flowtable->gc_work); + } + + static int flow_offload_netdev_event(struct notifier_block *this, diff --git a/target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch b/target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch new file mode 100644 index 0000000000..539550d542 --- /dev/null +++ b/target/linux/generic/backport-4.14/340-netfilter-nft_flow_offload-no-need-to-flush-entries-.patch @@ -0,0 +1,29 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Thu, 1 Feb 2018 18:49:01 +0100 +Subject: [PATCH] netfilter: nft_flow_offload: no need to flush entries on + module removal + +nft_flow_offload module removal does not require to flush existing +flowtables, it is valid to remove this module while keeping flowtables +around. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -247,14 +247,8 @@ register_expr: + + static void __exit nft_flow_offload_module_exit(void) + { +- struct net *net; +- + nft_unregister_expr(&nft_flow_offload_type); + unregister_netdevice_notifier(&flow_offload_netdev_notifier); +- rtnl_lock(); +- for_each_net(net) +- nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); +- rtnl_unlock(); + } + + module_init(nft_flow_offload_module_init); diff --git a/target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch b/target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch new file mode 100644 index 0000000000..9ee0ad5936 --- /dev/null +++ b/target/linux/generic/backport-4.14/341-netfilter-nft_flow_offload-move-flowtable-cleanup-ro.patch @@ -0,0 +1,97 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Tue, 23 Jan 2018 17:46:09 +0100 +Subject: [PATCH] netfilter: nft_flow_offload: move flowtable cleanup + routines to nf_flow_table + +Move the flowtable cleanup routines to nf_flow_table and expose the +nf_flow_table_cleanup() helper function. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -95,6 +95,9 @@ struct flow_offload_tuple_rhash *flow_of + int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data); ++ ++void nf_flow_table_cleanup(struct net *net, struct net_device *dev); ++ + void nf_flow_offload_work_gc(struct work_struct *work); + extern const struct rhashtable_params nf_flow_offload_rhash_params; + +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -4,6 +4,7 @@ + #include <linux/netfilter.h> + #include <linux/rhashtable.h> + #include <linux/netdevice.h> ++#include <net/netfilter/nf_tables.h> + #include <net/netfilter/nf_flow_table.h> + #include <net/netfilter/nf_conntrack.h> + #include <net/netfilter/nf_conntrack_core.h> +@@ -425,5 +426,28 @@ int nf_flow_dnat_port(const struct flow_ + } + EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + ++static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) ++{ ++ struct net_device *dev = data; ++ ++ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) ++ return; ++ ++ flow_offload_dead(flow); ++} ++ ++static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, ++ void *data) ++{ ++ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); ++ flush_delayed_work(&flowtable->gc_work); ++} ++ ++void nf_flow_table_cleanup(struct net *net, struct net_device *dev) ++{ ++ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); ++ + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -194,23 +194,6 @@ static struct nft_expr_type nft_flow_off + .owner = THIS_MODULE, + }; + +-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) +-{ +- struct net_device *dev = data; +- +- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) +- return; +- +- flow_offload_dead(flow); +-} +- +-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, +- void *data) +-{ +- nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); +- flush_delayed_work(&flowtable->gc_work); +-} +- + static int flow_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) + { +@@ -219,7 +202,7 @@ static int flow_offload_netdev_event(str + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + +- nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); ++ nf_flow_table_cleanup(dev_net(dev), dev); + + return NOTIFY_DONE; + } diff --git a/target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch b/target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch new file mode 100644 index 0000000000..334a81474d --- /dev/null +++ b/target/linux/generic/backport-4.14/342-netfilter-nf_tables-fix-flowtable-free.patch @@ -0,0 +1,140 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Mon, 5 Feb 2018 21:44:50 +0100 +Subject: [PATCH] netfilter: nf_tables: fix flowtable free + +Every flow_offload entry is added into the table twice. Because of this, +rhashtable_free_and_destroy can't be used, since it would call kfree for +each flow_offload object twice. + +This patch adds a call to nf_flow_table_iterate_cleanup() to schedule +removal of entries, then there is an explicitly invocation of the +garbage collector to clean up resources. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -14,6 +14,7 @@ struct nf_flowtable_type { + struct list_head list; + int family; + void (*gc)(struct work_struct *work); ++ void (*free)(struct nf_flowtable *ft); + const struct rhashtable_params *params; + nf_hookfn *hook; + struct module *owner; +@@ -98,6 +99,7 @@ int nf_flow_table_iterate(struct nf_flow + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev); + ++void nf_flow_table_free(struct nf_flowtable *flow_table); + void nf_flow_offload_work_gc(struct work_struct *work); + extern const struct rhashtable_params nf_flow_offload_rhash_params; + +--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtabl + .family = NFPROTO_IPV4, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, ++ .free = nf_flow_table_free, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, + }; +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -254,6 +254,7 @@ static struct nf_flowtable_type flowtabl + .family = NFPROTO_IPV6, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, ++ .free = nf_flow_table_free, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, + }; +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -232,19 +232,16 @@ static inline bool nf_flow_is_dying(cons + return flow->flags & FLOW_OFFLOAD_DYING; + } + +-void nf_flow_offload_work_gc(struct work_struct *work) ++static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) + { + struct flow_offload_tuple_rhash *tuplehash; +- struct nf_flowtable *flow_table; + struct rhashtable_iter hti; + struct flow_offload *flow; + int err; + +- flow_table = container_of(work, struct nf_flowtable, gc_work.work); +- + err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); + if (err) +- goto schedule; ++ return 0; + + rhashtable_walk_start(&hti); + +@@ -270,7 +267,16 @@ void nf_flow_offload_work_gc(struct work + out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +-schedule: ++ ++ return 1; ++} ++ ++void nf_flow_offload_work_gc(struct work_struct *work) ++{ ++ struct nf_flowtable *flow_table; ++ ++ flow_table = container_of(work, struct nf_flowtable, gc_work.work); ++ nf_flow_offload_gc_step(flow_table); + queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); + } + EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); +@@ -449,5 +455,12 @@ void nf_flow_table_cleanup(struct net *n + } + EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + ++void nf_flow_table_free(struct nf_flowtable *flow_table) ++{ ++ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); ++ WARN_ON(!nf_flow_offload_gc_step(flow_table)); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_free); ++ + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +--- a/net/netfilter/nf_flow_table_inet.c ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtabl + .family = NFPROTO_INET, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, ++ .free = nf_flow_table_free, + .hook = nf_flow_offload_inet_hook, + .owner = THIS_MODULE, + }; +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5201,17 +5201,12 @@ err: + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); + } + +-static void nft_flowtable_destroy(void *ptr, void *arg) +-{ +- kfree(ptr); +-} +- + static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) + { + cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->name); +- rhashtable_free_and_destroy(&flowtable->data.rhashtable, +- nft_flowtable_destroy, NULL); ++ flowtable->data.type->free(&flowtable->data); ++ rhashtable_destroy(&flowtable->data.rhashtable); + module_put(flowtable->data.type->owner); + } + diff --git a/target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch b/target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch new file mode 100644 index 0000000000..7f35cd7c60 --- /dev/null +++ b/target/linux/generic/backport-4.14/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch @@ -0,0 +1,96 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Thu, 25 Jan 2018 12:58:55 +0100 +Subject: [PATCH] netfilter: nft_flow_offload: handle netdevice events from + nf_flow_table + +Move the code that deals with device events to the core. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -462,5 +462,35 @@ void nf_flow_table_free(struct nf_flowta + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + ++static int nf_flow_table_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ ++ if (event != NETDEV_DOWN) ++ return NOTIFY_DONE; ++ ++ nf_flow_table_cleanup(dev_net(dev), dev); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block flow_offload_netdev_notifier = { ++ .notifier_call = nf_flow_table_netdev_event, ++}; ++ ++static int __init nf_flow_table_module_init(void) ++{ ++ return register_netdevice_notifier(&flow_offload_netdev_notifier); ++} ++ ++static void __exit nf_flow_table_module_exit(void) ++{ ++ unregister_netdevice_notifier(&flow_offload_netdev_notifier); ++} ++ ++module_init(nf_flow_table_module_init); ++module_exit(nf_flow_table_module_exit); ++ + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -194,44 +194,14 @@ static struct nft_expr_type nft_flow_off + .owner = THIS_MODULE, + }; + +-static int flow_offload_netdev_event(struct notifier_block *this, +- unsigned long event, void *ptr) +-{ +- struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- +- if (event != NETDEV_DOWN) +- return NOTIFY_DONE; +- +- nf_flow_table_cleanup(dev_net(dev), dev); +- +- return NOTIFY_DONE; +-} +- +-static struct notifier_block flow_offload_netdev_notifier = { +- .notifier_call = flow_offload_netdev_event, +-}; +- + static int __init nft_flow_offload_module_init(void) + { +- int err; +- +- register_netdevice_notifier(&flow_offload_netdev_notifier); +- +- err = nft_register_expr(&nft_flow_offload_type); +- if (err < 0) +- goto register_expr; +- +- return 0; +- +-register_expr: +- unregister_netdevice_notifier(&flow_offload_netdev_notifier); +- return err; ++ return nft_register_expr(&nft_flow_offload_type); + } + + static void __exit nft_flow_offload_module_exit(void) + { + nft_unregister_expr(&nft_flow_offload_type); +- unregister_netdevice_notifier(&flow_offload_netdev_notifier); + } + + module_init(nft_flow_offload_module_init); diff --git a/target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch b/target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch new file mode 100644 index 0000000000..97778a99f1 --- /dev/null +++ b/target/linux/generic/backport-4.14/344-netfilter-nf_tables-allocate-handle-and-delete-objec.patch @@ -0,0 +1,468 @@ +From: Harsha Sharma <harshasharmaiitr@gmail.com> +Date: Wed, 27 Dec 2017 00:59:00 +0530 +Subject: [PATCH] netfilter: nf_tables: allocate handle and delete objects via + handle + +This patch allows deletion of objects via unique handle which can be +listed via '-a' option. + +Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -369,6 +369,7 @@ void nft_unregister_set(struct nft_set_t + * @list: table set list node + * @bindings: list of set bindings + * @name: name of the set ++ * @handle: unique handle of the set + * @ktype: key type (numeric type defined by userspace, not used in the kernel) + * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) +@@ -391,6 +392,7 @@ struct nft_set { + struct list_head list; + struct list_head bindings; + char *name; ++ u64 handle; + u32 ktype; + u32 dtype; + u32 objtype; +@@ -936,6 +938,7 @@ unsigned int nft_do_chain(struct nft_pkt + * @objects: stateful objects in the table + * @flowtables: flow tables in the table + * @hgenerator: handle generator state ++ * @handle: table handle + * @use: number of chain references to this table + * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask +@@ -949,6 +952,7 @@ struct nft_table { + struct list_head objects; + struct list_head flowtables; + u64 hgenerator; ++ u64 handle; + u32 use; + u16 family:6, + flags:8, +@@ -973,9 +977,9 @@ int nft_verdict_dump(struct sk_buff *skb + * @name: name of this stateful object + * @genmask: generation mask + * @use: number of references to this stateful object +- * @data: object data, layout depends on type ++ * @handle: unique object handle + * @ops: object operations +- * @data: pointer to object data ++ * @data: object data, layout depends on type + */ + struct nft_object { + struct list_head list; +@@ -983,6 +987,7 @@ struct nft_object { + struct nft_table *table; + u32 genmask:2, + use:30; ++ u64 handle; + /* runtime data below here */ + const struct nft_object_ops *ops ____cacheline_aligned; + unsigned char data[] +@@ -1064,6 +1069,7 @@ void nft_unregister_obj(struct nft_objec + * @ops_len: number of hooks in array + * @genmask: generation mask + * @use: number of references to this flow table ++ * @handle: unique object handle + * @data: rhashtable and garbage collector + * @ops: array of hooks + */ +@@ -1076,6 +1082,7 @@ struct nft_flowtable { + int ops_len; + u32 genmask:2, + use:30; ++ u64 handle; + /* runtime data below here */ + struct nf_hook_ops *ops ____cacheline_aligned; + struct nf_flowtable data; +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -174,6 +174,8 @@ enum nft_table_attributes { + NFTA_TABLE_NAME, + NFTA_TABLE_FLAGS, + NFTA_TABLE_USE, ++ NFTA_TABLE_HANDLE, ++ NFTA_TABLE_PAD, + __NFTA_TABLE_MAX + }; + #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) +@@ -317,6 +319,7 @@ enum nft_set_desc_attributes { + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) + * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) ++ * @NFTA_SET_HANDLE: set handle (NLA_U64) + */ + enum nft_set_attributes { + NFTA_SET_UNSPEC, +@@ -335,6 +338,7 @@ enum nft_set_attributes { + NFTA_SET_USERDATA, + NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, ++ NFTA_SET_HANDLE, + __NFTA_SET_MAX + }; + #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) +@@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes { + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) ++ * @NFTA_OBJ_HANDLE: object handle (NLA_U64) + */ + enum nft_object_attributes { + NFTA_OBJ_UNSPEC, +@@ -1322,6 +1327,8 @@ enum nft_object_attributes { + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, ++ NFTA_OBJ_HANDLE, ++ NFTA_OBJ_PAD, + __NFTA_OBJ_MAX + }; + #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) +@@ -1333,6 +1340,7 @@ enum nft_object_attributes { + * @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING) + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) ++ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) + */ + enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, +@@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes { + NFTA_FLOWTABLE_NAME, + NFTA_FLOWTABLE_HOOK, + NFTA_FLOWTABLE_USE, ++ NFTA_FLOWTABLE_HANDLE, ++ NFTA_FLOWTABLE_PAD, + __NFTA_FLOWTABLE_MAX + }; + #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -26,6 +26,7 @@ + static LIST_HEAD(nf_tables_expressions); + static LIST_HEAD(nf_tables_objects); + static LIST_HEAD(nf_tables_flowtables); ++static u64 table_handle; + + static void nft_ctx_init(struct nft_ctx *ctx, + struct net *net, +@@ -332,6 +333,20 @@ static struct nft_table *nft_table_looku + return NULL; + } + ++static struct nft_table *nft_table_lookup_byhandle(const struct net *net, ++ const struct nlattr *nla, ++ u8 genmask) ++{ ++ struct nft_table *table; ++ ++ list_for_each_entry(table, &net->nft.tables, list) { ++ if (be64_to_cpu(nla_get_be64(nla)) == table->handle && ++ nft_active_genmask(table, genmask)) ++ return table; ++ } ++ return NULL; ++} ++ + static struct nft_table *nf_tables_table_lookup(const struct net *net, + const struct nlattr *nla, + u8 family, u8 genmask) +@@ -348,6 +363,22 @@ static struct nft_table *nf_tables_table + return ERR_PTR(-ENOENT); + } + ++static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net, ++ const struct nlattr *nla, ++ u8 genmask) ++{ ++ struct nft_table *table; ++ ++ if (nla == NULL) ++ return ERR_PTR(-EINVAL); ++ ++ table = nft_table_lookup_byhandle(net, nla, genmask); ++ if (table != NULL) ++ return table; ++ ++ return ERR_PTR(-ENOENT); ++} ++ + static inline u64 nf_tables_alloc_handle(struct nft_table *table) + { + return ++table->hgenerator; +@@ -394,6 +425,7 @@ static const struct nla_policy nft_table + [NFTA_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, + [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, ++ [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, + }; + + static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, +@@ -415,7 +447,9 @@ static int nf_tables_fill_table_info(str + + if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || +- nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use))) ++ nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || ++ nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), ++ NFTA_TABLE_PAD)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); +@@ -674,6 +708,7 @@ static int nf_tables_newtable(struct net + INIT_LIST_HEAD(&table->flowtables); + table->family = family; + table->flags = flags; ++ table->handle = ++table_handle; + + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); +@@ -791,11 +826,18 @@ static int nf_tables_deltable(struct net + struct nft_ctx ctx; + + nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla); +- if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL) ++ if (family == AF_UNSPEC || ++ (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE])) + return nft_flush(&ctx, family); + +- table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family, +- genmask); ++ if (nla[NFTA_TABLE_HANDLE]) ++ table = nf_tables_table_lookup_byhandle(net, ++ nla[NFTA_TABLE_HANDLE], ++ genmask); ++ else ++ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], ++ family, genmask); ++ + if (IS_ERR(table)) + return PTR_ERR(table); + +@@ -1534,6 +1576,7 @@ static int nf_tables_delchain(struct net + struct nft_rule *rule; + int family = nfmsg->nfgen_family; + struct nft_ctx ctx; ++ u64 handle; + u32 use; + int err; + +@@ -1542,7 +1585,12 @@ static int nf_tables_delchain(struct net + if (IS_ERR(table)) + return PTR_ERR(table); + +- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); ++ if (nla[NFTA_CHAIN_HANDLE]) { ++ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); ++ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); ++ } else { ++ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); ++ } + if (IS_ERR(chain)) + return PTR_ERR(chain); + +@@ -2503,6 +2551,7 @@ static const struct nla_policy nft_set_p + [NFTA_SET_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, + [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, ++ [NFTA_SET_HANDLE] = { .type = NLA_U64 }, + }; + + static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { +@@ -2546,6 +2595,22 @@ static struct nft_set *nf_tables_set_loo + return ERR_PTR(-ENOENT); + } + ++static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table, ++ const struct nlattr *nla, u8 genmask) ++{ ++ struct nft_set *set; ++ ++ if (nla == NULL) ++ return ERR_PTR(-EINVAL); ++ ++ list_for_each_entry(set, &table->sets, list) { ++ if (be64_to_cpu(nla_get_be64(nla)) == set->handle && ++ nft_active_genmask(set, genmask)) ++ return set; ++ } ++ return ERR_PTR(-ENOENT); ++} ++ + static struct nft_set *nf_tables_set_lookup_byid(const struct net *net, + const struct nlattr *nla, + u8 genmask) +@@ -2661,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_ + goto nla_put_failure; + if (nla_put_string(skb, NFTA_SET_NAME, set->name)) + goto nla_put_failure; ++ if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle), ++ NFTA_SET_PAD)) ++ goto nla_put_failure; + if (set->flags != 0) + if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) + goto nla_put_failure; +@@ -3069,6 +3137,7 @@ static int nf_tables_newset(struct net * + set->udata = udata; + set->timeout = timeout; + set->gc_int = gc_int; ++ set->handle = nf_tables_alloc_handle(table); + + err = ops->init(set, &desc, nla); + if (err < 0) +@@ -3126,7 +3195,10 @@ static int nf_tables_delset(struct net * + if (err < 0) + return err; + +- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); ++ if (nla[NFTA_SET_HANDLE]) ++ set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask); ++ else ++ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); + if (IS_ERR(set)) + return PTR_ERR(set); + +@@ -4182,6 +4254,21 @@ struct nft_object *nf_tables_obj_lookup( + } + EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); + ++struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table, ++ const struct nlattr *nla, ++ u32 objtype, u8 genmask) ++{ ++ struct nft_object *obj; ++ ++ list_for_each_entry(obj, &table->objects, list) { ++ if (be64_to_cpu(nla_get_be64(nla)) == obj->handle && ++ objtype == obj->ops->type->type && ++ nft_active_genmask(obj, genmask)) ++ return obj; ++ } ++ return ERR_PTR(-ENOENT); ++} ++ + static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { + [NFTA_OBJ_TABLE] = { .type = NLA_STRING, + .len = NFT_TABLE_MAXNAMELEN - 1 }, +@@ -4189,6 +4276,7 @@ static const struct nla_policy nft_obj_p + .len = NFT_OBJ_MAXNAMELEN - 1 }, + [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, ++ [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, + }; + + static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, +@@ -4336,6 +4424,8 @@ static int nf_tables_newobj(struct net * + goto err1; + } + obj->table = table; ++ obj->handle = nf_tables_alloc_handle(table); ++ + obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->name) { + err = -ENOMEM; +@@ -4382,7 +4472,9 @@ static int nf_tables_fill_obj_info(struc + nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || + nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || +- nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) ++ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || ++ nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle), ++ NFTA_OBJ_PAD)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); +@@ -4580,7 +4672,7 @@ static int nf_tables_delobj(struct net * + u32 objtype; + + if (!nla[NFTA_OBJ_TYPE] || +- !nla[NFTA_OBJ_NAME]) ++ (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE])) + return -EINVAL; + + table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family, +@@ -4589,7 +4681,12 @@ static int nf_tables_delobj(struct net * + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); +- obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); ++ if (nla[NFTA_OBJ_HANDLE]) ++ obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE], ++ objtype, genmask); ++ else ++ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], ++ objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + if (obj->use > 0) +@@ -4661,6 +4758,7 @@ static const struct nla_policy nft_flowt + [NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, + [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, ++ [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, + }; + + struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, +@@ -4678,6 +4776,20 @@ struct nft_flowtable *nf_tables_flowtabl + } + EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup); + ++struct nft_flowtable * ++nf_tables_flowtable_lookup_byhandle(const struct nft_table *table, ++ const struct nlattr *nla, u8 genmask) ++{ ++ struct nft_flowtable *flowtable; ++ ++ list_for_each_entry(flowtable, &table->flowtables, list) { ++ if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle && ++ nft_active_genmask(flowtable, genmask)) ++ return flowtable; ++ } ++ return ERR_PTR(-ENOENT); ++} ++ + #define NFT_FLOWTABLE_DEVICE_MAX 8 + + static int nf_tables_parse_devices(const struct nft_ctx *ctx, +@@ -4886,6 +4998,8 @@ static int nf_tables_newflowtable(struct + return -ENOMEM; + + flowtable->table = table; ++ flowtable->handle = nf_tables_alloc_handle(table); ++ + flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL); + if (!flowtable->name) { + err = -ENOMEM; +@@ -4960,8 +5074,14 @@ static int nf_tables_delflowtable(struct + if (IS_ERR(table)) + return PTR_ERR(table); + +- flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], +- genmask); ++ if (nla[NFTA_FLOWTABLE_HANDLE]) ++ flowtable = nf_tables_flowtable_lookup_byhandle(table, ++ nla[NFTA_FLOWTABLE_HANDLE], ++ genmask); ++ else ++ flowtable = nf_tables_flowtable_lookup(table, ++ nla[NFTA_FLOWTABLE_NAME], ++ genmask); + if (IS_ERR(flowtable)) + return PTR_ERR(flowtable); + if (flowtable->use > 0) +@@ -4994,7 +5114,9 @@ static int nf_tables_fill_flowtable_info + + if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) || + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || +- nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use))) ++ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || ++ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), ++ NFTA_FLOWTABLE_PAD)) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); diff --git a/target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch b/target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch new file mode 100644 index 0000000000..331f22d19a --- /dev/null +++ b/target/linux/generic/backport-4.14/345-netfilter-nf_flow_offload-fix-use-after-free-and-a-r.patch @@ -0,0 +1,95 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Wed, 7 Feb 2018 09:23:25 +0100 +Subject: [PATCH] netfilter: nf_flow_offload: fix use-after-free and a resource + leak + +flow_offload_del frees the flow, so all associated resource must be +freed before. + +Since the ct entry in struct flow_offload_entry was allocated by +flow_offload_alloc, it should be freed by flow_offload_free to take care +of the error handling path when flow_offload_add fails. + +While at it, make flow_offload_del static, since it should never be +called directly, only from the gc step + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -90,7 +90,6 @@ struct flow_offload *flow_offload_alloc( + void flow_offload_free(struct flow_offload *flow); + + int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); +-void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow); + struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, + struct flow_offload_tuple *tuple); + int nf_flow_table_iterate(struct nf_flowtable *flow_table, +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -125,7 +125,9 @@ void flow_offload_free(struct flow_offlo + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); + dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); + e = container_of(flow, struct flow_offload_entry, flow); +- kfree(e); ++ nf_ct_delete(e->ct, 0, 0); ++ nf_ct_put(e->ct); ++ kfree_rcu(e, rcu_head); + } + EXPORT_SYMBOL_GPL(flow_offload_free); + +@@ -149,11 +151,9 @@ int flow_offload_add(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_add); + +-void flow_offload_del(struct nf_flowtable *flow_table, +- struct flow_offload *flow) ++static void flow_offload_del(struct nf_flowtable *flow_table, ++ struct flow_offload *flow) + { +- struct flow_offload_entry *e; +- + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + *flow_table->type->params); +@@ -161,10 +161,8 @@ void flow_offload_del(struct nf_flowtabl + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + *flow_table->type->params); + +- e = container_of(flow, struct flow_offload_entry, flow); +- kfree_rcu(e, rcu_head); ++ flow_offload_free(flow); + } +-EXPORT_SYMBOL_GPL(flow_offload_del); + + struct flow_offload_tuple_rhash * + flow_offload_lookup(struct nf_flowtable *flow_table, +@@ -175,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_lookup); + +-static void nf_flow_release_ct(const struct flow_offload *flow) +-{ +- struct flow_offload_entry *e; +- +- e = container_of(flow, struct flow_offload_entry, flow); +- nf_ct_delete(e->ct, 0, 0); +- nf_ct_put(e->ct); +-} +- + int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) +@@ -259,10 +248,8 @@ static int nf_flow_offload_gc_step(struc + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + + if (nf_flow_has_expired(flow) || +- nf_flow_is_dying(flow)) { ++ nf_flow_is_dying(flow)) + flow_offload_del(flow_table, flow); +- nf_flow_release_ct(flow); +- } + } + out: + rhashtable_walk_stop(&hti); diff --git a/target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch b/target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch new file mode 100644 index 0000000000..5267fd2f67 --- /dev/null +++ b/target/linux/generic/backport-4.14/346-netfilter-flowtable-infrastructure-depends-on-NETFIL.patch @@ -0,0 +1,73 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Wed, 31 Jan 2018 18:13:39 +0100 +Subject: [PATCH] netfilter: flowtable infrastructure depends on + NETFILTER_INGRESS + +config NF_FLOW_TABLE depends on NETFILTER_INGRESS. If users forget to +enable this toggle, flowtable registration fails with EOPNOTSUPP. + +Moreover, turn 'select NF_FLOW_TABLE' in every flowtable family flavour +into dependency instead, otherwise this new dependency on +NETFILTER_INGRESS causes a warning. This also allows us to remove the +explicit dependency between family flowtables <-> NF_TABLES and +NF_CONNTRACK, given they depend on the NF_FLOW_TABLE core that already +expresses the general dependencies for this new infrastructure. + +Moreover, NF_FLOW_TABLE_INET depends on NF_FLOW_TABLE_IPV4 and +NF_FLOWTABLE_IPV6, which already depends on NF_FLOW_TABLE. So we can get +rid of direct dependency with NF_FLOW_TABLE. + +In general, let's avoid 'select', it just makes things more complicated. + +Reported-by: John Crispin <john@phrozen.org> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -79,8 +79,7 @@ endif # NF_TABLES + + config NF_FLOW_TABLE_IPV4 + tristate "Netfilter flow table IPv4 module" +- depends on NF_CONNTRACK && NF_TABLES +- select NF_FLOW_TABLE ++ depends on NF_FLOW_TABLE + help + This option adds the flow table IPv4 support. + +--- a/net/ipv6/netfilter/Kconfig ++++ b/net/ipv6/netfilter/Kconfig +@@ -73,8 +73,7 @@ endif # NF_TABLES + + config NF_FLOW_TABLE_IPV6 + tristate "Netfilter flow table IPv6 module" +- depends on NF_CONNTRACK && NF_TABLES +- select NF_FLOW_TABLE ++ depends on NF_FLOW_TABLE + help + This option adds the flow table IPv6 support. + +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -670,8 +670,8 @@ endif # NF_TABLES + + config NF_FLOW_TABLE_INET + tristate "Netfilter flow table mixed IPv4/IPv6 module" +- depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6 +- select NF_FLOW_TABLE ++ depends on NF_FLOW_TABLE_IPV4 ++ depends on NF_FLOW_TABLE_IPV6 + help + This option adds the flow table mixed IPv4/IPv6 support. + +@@ -679,7 +679,9 @@ config NF_FLOW_TABLE_INET + + config NF_FLOW_TABLE + tristate "Netfilter flow table module" +- depends on NF_CONNTRACK && NF_TABLES ++ depends on NETFILTER_INGRESS ++ depends on NF_CONNTRACK ++ depends on NF_TABLES + help + This option adds the flow table core infrastructure. + diff --git a/target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch b/target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch new file mode 100644 index 0000000000..c8a0972725 --- /dev/null +++ b/target/linux/generic/backport-4.14/347-netfilter-remove-duplicated-include.patch @@ -0,0 +1,29 @@ +From: Wei Yongjun <weiyongjun1@huawei.com> +Date: Wed, 10 Jan 2018 13:06:46 +0000 +Subject: [PATCH] netfilter: remove duplicated include + +Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -5,7 +5,6 @@ + #include <linux/rhashtable.h> + #include <linux/ipv6.h> + #include <linux/netdevice.h> +-#include <linux/ipv6.h> + #include <net/ipv6.h> + #include <net/ip6_route.h> + #include <net/neighbour.h> +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -15,8 +15,6 @@ + #include <linux/netfilter_bridge.h> + #include <linux/seq_file.h> + #include <linux/rcupdate.h> +-#include <linux/netfilter_ipv4.h> +-#include <linux/netfilter_ipv6.h> + #include <net/protocol.h> + #include <net/netfilter/nf_queue.h> + #include <net/dst.h> diff --git a/target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch b/target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch new file mode 100644 index 0000000000..382b33c078 --- /dev/null +++ b/target/linux/generic/backport-4.14/348-netfilter-nf_flow_table-use-IP_CT_DIR_-values-for-FL.patch @@ -0,0 +1,35 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Fri, 16 Feb 2018 09:41:18 +0100 +Subject: [PATCH] netfilter: nf_flow_table: use IP_CT_DIR_* values for + FLOW_OFFLOAD_DIR_* + +Simplifies further code cleanups + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -6,6 +6,7 @@ + #include <linux/netdevice.h> + #include <linux/rhashtable.h> + #include <linux/rcupdate.h> ++#include <linux/netfilter/nf_conntrack_tuple_common.h> + #include <net/dst.h> + + struct nf_flowtable; +@@ -27,11 +28,10 @@ struct nf_flowtable { + }; + + enum flow_offload_tuple_dir { +- FLOW_OFFLOAD_DIR_ORIGINAL, +- FLOW_OFFLOAD_DIR_REPLY, +- __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, ++ FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, ++ FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, ++ FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX + }; +-#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) + + struct flow_offload_tuple { + union { diff --git a/target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch b/target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch new file mode 100644 index 0000000000..39ea757f04 --- /dev/null +++ b/target/linux/generic/backport-4.14/349-netfilter-nf_flow_table-clean-up-flow_offload_alloc.patch @@ -0,0 +1,118 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Fri, 16 Feb 2018 09:42:32 +0100 +Subject: [PATCH] netfilter: nf_flow_table: clean up flow_offload_alloc + +Reduce code duplication and make it much easier to read + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -16,6 +16,38 @@ struct flow_offload_entry { + struct rcu_head rcu_head; + }; + ++static void ++flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ++ struct nf_flow_route *route, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; ++ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; ++ ++ ft->dir = dir; ++ ++ switch (ctt->src.l3num) { ++ case NFPROTO_IPV4: ++ ft->src_v4 = ctt->src.u3.in; ++ ft->dst_v4 = ctt->dst.u3.in; ++ break; ++ case NFPROTO_IPV6: ++ ft->src_v6 = ctt->src.u3.in6; ++ ft->dst_v6 = ctt->dst.u3.in6; ++ break; ++ } ++ ++ ft->l3proto = ctt->src.l3num; ++ ft->l4proto = ctt->dst.protonum; ++ ft->src_port = ctt->src.u.tcp.port; ++ ft->dst_port = ctt->dst.u.tcp.port; ++ ++ ft->iifidx = route->tuple[dir].ifindex; ++ ft->oifidx = route->tuple[!dir].ifindex; ++ ++ ft->dst_cache = route->tuple[dir].dst; ++} ++ + struct flow_offload * + flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) + { +@@ -40,65 +72,8 @@ flow_offload_alloc(struct nf_conn *ct, s + + entry->ct = ct; + +- switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) { +- case NFPROTO_IPV4: +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in; +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in; +- break; +- case NFPROTO_IPV6: +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6; +- break; +- } +- +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; +- +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache = +- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache = +- route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst; +- +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port; +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port = +- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port = +- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; +- +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir = +- FLOW_OFFLOAD_DIR_ORIGINAL; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir = +- FLOW_OFFLOAD_DIR_REPLY; +- +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = +- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; +- flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx = +- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = +- route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex; +- flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx = +- route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex; ++ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); ++ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); + + if (ct->status & IPS_SRC_NAT) + flow->flags |= FLOW_OFFLOAD_SNAT; diff --git a/target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch b/target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch new file mode 100644 index 0000000000..3a0275bd37 --- /dev/null +++ b/target/linux/generic/backport-4.14/350-ipv6-make-ip6_dst_mtu_forward-inline.patch @@ -0,0 +1,80 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Fri, 16 Feb 2018 10:54:24 +0100 +Subject: [PATCH] ipv6: make ip6_dst_mtu_forward inline + +Removes a direct dependency on ipv6.ko + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/ip6_route.h ++++ b/include/net/ip6_route.h +@@ -252,4 +252,26 @@ static inline bool rt6_duplicate_nexthop + ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && + !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); + } ++ ++static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) ++{ ++ unsigned int mtu; ++ struct inet6_dev *idev; ++ ++ if (dst_metric_locked(dst, RTAX_MTU)) { ++ mtu = dst_metric_raw(dst, RTAX_MTU); ++ if (mtu) ++ return mtu; ++ } ++ ++ mtu = IPV6_MIN_MTU; ++ rcu_read_lock(); ++ idev = __in6_dev_get(dst->dev); ++ if (idev) ++ mtu = idev->cnf.mtu6; ++ rcu_read_unlock(); ++ ++ return mtu; ++} ++ + #endif +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -913,8 +913,6 @@ static inline struct sk_buff *ip6_finish + &inet6_sk(sk)->cork); + } + +-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); +- + int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, + struct flowi6 *fl6); + struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -370,28 +370,6 @@ static inline int ip6_forward_finish(str + return dst_output(net, sk, skb); + } + +-unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +-{ +- unsigned int mtu; +- struct inet6_dev *idev; +- +- if (dst_metric_locked(dst, RTAX_MTU)) { +- mtu = dst_metric_raw(dst, RTAX_MTU); +- if (mtu) +- return mtu; +- } +- +- mtu = IPV6_MIN_MTU; +- rcu_read_lock(); +- idev = __in6_dev_get(dst->dev); +- if (idev) +- mtu = idev->cnf.mtu6; +- rcu_read_unlock(); +- +- return mtu; +-} +-EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward); +- + static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) + { + if (skb->len <= mtu) diff --git a/target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch b/target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch new file mode 100644 index 0000000000..e2015e72ac --- /dev/null +++ b/target/linux/generic/backport-4.14/351-netfilter-nf_flow_table-cache-mtu-in-struct-flow_off.patch @@ -0,0 +1,145 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Fri, 16 Feb 2018 10:57:23 +0100 +Subject: [PATCH] netfilter: nf_flow_table: cache mtu in struct + flow_offload_tuple + +Reduces the number of cache lines touched in the offload forwarding path + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -55,6 +55,8 @@ struct flow_offload_tuple { + + int oifidx; + ++ u16 mtu; ++ + struct dst_entry *dst_cache; + }; + +--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -177,7 +177,7 @@ static int nf_flow_tuple_ip(struct sk_bu + } + + /* Based on ip_exceeds_mtu(). */ +-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) + { + if (skb->len <= mtu) + return false; +@@ -191,17 +191,6 @@ static bool __nf_flow_exceeds_mtu(const + return true; + } + +-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) +-{ +- u32 mtu; +- +- mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); +- if (__nf_flow_exceeds_mtu(skb, mtu)) +- return true; +- +- return false; +-} +- + unsigned int + nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +@@ -232,9 +221,9 @@ nf_flow_offload_ip_hook(void *priv, stru + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); +- + rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; +- if (unlikely(nf_flow_exceeds_mtu(skb, rt))) ++ ++ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*iph))) +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -173,7 +173,7 @@ static int nf_flow_tuple_ipv6(struct sk_ + } + + /* Based on ip_exceeds_mtu(). */ +-static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) + { + if (skb->len <= mtu) + return false; +@@ -184,17 +184,6 @@ static bool __nf_flow_exceeds_mtu(const + return true; + } + +-static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) +-{ +- u32 mtu; +- +- mtu = ip6_dst_mtu_forward(&rt->dst); +- if (__nf_flow_exceeds_mtu(skb, mtu)) +- return true; +- +- return false; +-} +- + unsigned int + nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +@@ -225,9 +214,9 @@ nf_flow_offload_ipv6_hook(void *priv, st + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); +- + rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; +- if (unlikely(nf_flow_exceeds_mtu(skb, rt))) ++ ++ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*ip6h))) +--- a/net/netfilter/nf_flow_table.c ++++ b/net/netfilter/nf_flow_table.c +@@ -4,6 +4,8 @@ + #include <linux/netfilter.h> + #include <linux/rhashtable.h> + #include <linux/netdevice.h> ++#include <net/ip.h> ++#include <net/ip6_route.h> + #include <net/netfilter/nf_tables.h> + #include <net/netfilter/nf_flow_table.h> + #include <net/netfilter/nf_conntrack.h> +@@ -23,6 +25,7 @@ flow_offload_fill_dir(struct flow_offloa + { + struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; + struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; ++ struct dst_entry *dst = route->tuple[dir].dst; + + ft->dir = dir; + +@@ -30,10 +33,12 @@ flow_offload_fill_dir(struct flow_offloa + case NFPROTO_IPV4: + ft->src_v4 = ctt->src.u3.in; + ft->dst_v4 = ctt->dst.u3.in; ++ ft->mtu = ip_dst_mtu_maybe_forward(dst, true); + break; + case NFPROTO_IPV6: + ft->src_v6 = ctt->src.u3.in6; + ft->dst_v6 = ctt->dst.u3.in6; ++ ft->mtu = ip6_dst_mtu_forward(dst); + break; + } + +@@ -44,8 +49,7 @@ flow_offload_fill_dir(struct flow_offloa + + ft->iifidx = route->tuple[dir].ifindex; + ft->oifidx = route->tuple[!dir].ifindex; +- +- ft->dst_cache = route->tuple[dir].dst; ++ ft->dst_cache = dst; + } + + struct flow_offload * diff --git a/target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch b/target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch new file mode 100644 index 0000000000..5df56dd643 --- /dev/null +++ b/target/linux/generic/backport-4.14/352-netfilter-nf_flow_table-rename-nf_flow_table.c-to-nf.patch @@ -0,0 +1,952 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Fri, 16 Feb 2018 11:08:47 +0100 +Subject: [PATCH] netfilter: nf_flow_table: rename nf_flow_table.c to + nf_flow_table_core.c + +Preparation for adding more code to the same module + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + rename net/netfilter/{nf_flow_table.c => nf_flow_table_core.c} (100%) + +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -113,6 +113,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_ + + # flow table infrastructure + obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o ++nf_flow_table-objs := nf_flow_table_core.o ++ + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o + + # generic X tables +--- a/net/netfilter/nf_flow_table.c ++++ /dev/null +@@ -1,462 +0,0 @@ +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <linux/module.h> +-#include <linux/netfilter.h> +-#include <linux/rhashtable.h> +-#include <linux/netdevice.h> +-#include <net/ip.h> +-#include <net/ip6_route.h> +-#include <net/netfilter/nf_tables.h> +-#include <net/netfilter/nf_flow_table.h> +-#include <net/netfilter/nf_conntrack.h> +-#include <net/netfilter/nf_conntrack_core.h> +-#include <net/netfilter/nf_conntrack_tuple.h> +- +-struct flow_offload_entry { +- struct flow_offload flow; +- struct nf_conn *ct; +- struct rcu_head rcu_head; +-}; +- +-static void +-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, +- struct nf_flow_route *route, +- enum flow_offload_tuple_dir dir) +-{ +- struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; +- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; +- struct dst_entry *dst = route->tuple[dir].dst; +- +- ft->dir = dir; +- +- switch (ctt->src.l3num) { +- case NFPROTO_IPV4: +- ft->src_v4 = ctt->src.u3.in; +- ft->dst_v4 = ctt->dst.u3.in; +- ft->mtu = ip_dst_mtu_maybe_forward(dst, true); +- break; +- case NFPROTO_IPV6: +- ft->src_v6 = ctt->src.u3.in6; +- ft->dst_v6 = ctt->dst.u3.in6; +- ft->mtu = ip6_dst_mtu_forward(dst); +- break; +- } +- +- ft->l3proto = ctt->src.l3num; +- ft->l4proto = ctt->dst.protonum; +- ft->src_port = ctt->src.u.tcp.port; +- ft->dst_port = ctt->dst.u.tcp.port; +- +- ft->iifidx = route->tuple[dir].ifindex; +- ft->oifidx = route->tuple[!dir].ifindex; +- ft->dst_cache = dst; +-} +- +-struct flow_offload * +-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) +-{ +- struct flow_offload_entry *entry; +- struct flow_offload *flow; +- +- if (unlikely(nf_ct_is_dying(ct) || +- !atomic_inc_not_zero(&ct->ct_general.use))) +- return NULL; +- +- entry = kzalloc(sizeof(*entry), GFP_ATOMIC); +- if (!entry) +- goto err_ct_refcnt; +- +- flow = &entry->flow; +- +- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) +- goto err_dst_cache_original; +- +- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) +- goto err_dst_cache_reply; +- +- entry->ct = ct; +- +- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); +- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); +- +- if (ct->status & IPS_SRC_NAT) +- flow->flags |= FLOW_OFFLOAD_SNAT; +- else if (ct->status & IPS_DST_NAT) +- flow->flags |= FLOW_OFFLOAD_DNAT; +- +- return flow; +- +-err_dst_cache_reply: +- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); +-err_dst_cache_original: +- kfree(entry); +-err_ct_refcnt: +- nf_ct_put(ct); +- +- return NULL; +-} +-EXPORT_SYMBOL_GPL(flow_offload_alloc); +- +-void flow_offload_free(struct flow_offload *flow) +-{ +- struct flow_offload_entry *e; +- +- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); +- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); +- e = container_of(flow, struct flow_offload_entry, flow); +- nf_ct_delete(e->ct, 0, 0); +- nf_ct_put(e->ct); +- kfree_rcu(e, rcu_head); +-} +-EXPORT_SYMBOL_GPL(flow_offload_free); +- +-void flow_offload_dead(struct flow_offload *flow) +-{ +- flow->flags |= FLOW_OFFLOAD_DYING; +-} +-EXPORT_SYMBOL_GPL(flow_offload_dead); +- +-int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) +-{ +- flow->timeout = (u32)jiffies; +- +- rhashtable_insert_fast(&flow_table->rhashtable, +- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +- *flow_table->type->params); +- rhashtable_insert_fast(&flow_table->rhashtable, +- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, +- *flow_table->type->params); +- return 0; +-} +-EXPORT_SYMBOL_GPL(flow_offload_add); +- +-static void flow_offload_del(struct nf_flowtable *flow_table, +- struct flow_offload *flow) +-{ +- rhashtable_remove_fast(&flow_table->rhashtable, +- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +- *flow_table->type->params); +- rhashtable_remove_fast(&flow_table->rhashtable, +- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, +- *flow_table->type->params); +- +- flow_offload_free(flow); +-} +- +-struct flow_offload_tuple_rhash * +-flow_offload_lookup(struct nf_flowtable *flow_table, +- struct flow_offload_tuple *tuple) +-{ +- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, +- *flow_table->type->params); +-} +-EXPORT_SYMBOL_GPL(flow_offload_lookup); +- +-int nf_flow_table_iterate(struct nf_flowtable *flow_table, +- void (*iter)(struct flow_offload *flow, void *data), +- void *data) +-{ +- struct flow_offload_tuple_rhash *tuplehash; +- struct rhashtable_iter hti; +- struct flow_offload *flow; +- int err; +- +- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); +- if (err) +- return err; +- +- rhashtable_walk_start(&hti); +- +- while ((tuplehash = rhashtable_walk_next(&hti))) { +- if (IS_ERR(tuplehash)) { +- err = PTR_ERR(tuplehash); +- if (err != -EAGAIN) +- goto out; +- +- continue; +- } +- if (tuplehash->tuple.dir) +- continue; +- +- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); +- +- iter(flow, data); +- } +-out: +- rhashtable_walk_stop(&hti); +- rhashtable_walk_exit(&hti); +- +- return err; +-} +-EXPORT_SYMBOL_GPL(nf_flow_table_iterate); +- +-static inline bool nf_flow_has_expired(const struct flow_offload *flow) +-{ +- return (__s32)(flow->timeout - (u32)jiffies) <= 0; +-} +- +-static inline bool nf_flow_is_dying(const struct flow_offload *flow) +-{ +- return flow->flags & FLOW_OFFLOAD_DYING; +-} +- +-static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) +-{ +- struct flow_offload_tuple_rhash *tuplehash; +- struct rhashtable_iter hti; +- struct flow_offload *flow; +- int err; +- +- err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); +- if (err) +- return 0; +- +- rhashtable_walk_start(&hti); +- +- while ((tuplehash = rhashtable_walk_next(&hti))) { +- if (IS_ERR(tuplehash)) { +- err = PTR_ERR(tuplehash); +- if (err != -EAGAIN) +- goto out; +- +- continue; +- } +- if (tuplehash->tuple.dir) +- continue; +- +- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); +- +- if (nf_flow_has_expired(flow) || +- nf_flow_is_dying(flow)) +- flow_offload_del(flow_table, flow); +- } +-out: +- rhashtable_walk_stop(&hti); +- rhashtable_walk_exit(&hti); +- +- return 1; +-} +- +-void nf_flow_offload_work_gc(struct work_struct *work) +-{ +- struct nf_flowtable *flow_table; +- +- flow_table = container_of(work, struct nf_flowtable, gc_work.work); +- nf_flow_offload_gc_step(flow_table); +- queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); +-} +-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); +- +-static u32 flow_offload_hash(const void *data, u32 len, u32 seed) +-{ +- const struct flow_offload_tuple *tuple = data; +- +- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); +-} +- +-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) +-{ +- const struct flow_offload_tuple_rhash *tuplehash = data; +- +- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); +-} +- +-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, +- const void *ptr) +-{ +- const struct flow_offload_tuple *tuple = arg->key; +- const struct flow_offload_tuple_rhash *x = ptr; +- +- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) +- return 1; +- +- return 0; +-} +- +-const struct rhashtable_params nf_flow_offload_rhash_params = { +- .head_offset = offsetof(struct flow_offload_tuple_rhash, node), +- .hashfn = flow_offload_hash, +- .obj_hashfn = flow_offload_hash_obj, +- .obj_cmpfn = flow_offload_hash_cmp, +- .automatic_shrinking = true, +-}; +-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); +- +-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, +- __be16 port, __be16 new_port) +-{ +- struct tcphdr *tcph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || +- skb_try_make_writable(skb, thoff + sizeof(*tcph))) +- return -1; +- +- tcph = (void *)(skb_network_header(skb) + thoff); +- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); +- +- return 0; +-} +- +-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, +- __be16 port, __be16 new_port) +-{ +- struct udphdr *udph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || +- skb_try_make_writable(skb, thoff + sizeof(*udph))) +- return -1; +- +- udph = (void *)(skb_network_header(skb) + thoff); +- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { +- inet_proto_csum_replace2(&udph->check, skb, port, +- new_port, true); +- if (!udph->check) +- udph->check = CSUM_MANGLED_0; +- } +- +- return 0; +-} +- +-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, +- u8 protocol, __be16 port, __be16 new_port) +-{ +- switch (protocol) { +- case IPPROTO_TCP: +- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) +- return NF_DROP; +- break; +- case IPPROTO_UDP: +- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) +- return NF_DROP; +- break; +- } +- +- return 0; +-} +- +-int nf_flow_snat_port(const struct flow_offload *flow, +- struct sk_buff *skb, unsigned int thoff, +- u8 protocol, enum flow_offload_tuple_dir dir) +-{ +- struct flow_ports *hdr; +- __be16 port, new_port; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || +- skb_try_make_writable(skb, thoff + sizeof(*hdr))) +- return -1; +- +- hdr = (void *)(skb_network_header(skb) + thoff); +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- port = hdr->source; +- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; +- hdr->source = new_port; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- port = hdr->dest; +- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; +- hdr->dest = new_port; +- break; +- default: +- return -1; +- } +- +- return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +-} +-EXPORT_SYMBOL_GPL(nf_flow_snat_port); +- +-int nf_flow_dnat_port(const struct flow_offload *flow, +- struct sk_buff *skb, unsigned int thoff, +- u8 protocol, enum flow_offload_tuple_dir dir) +-{ +- struct flow_ports *hdr; +- __be16 port, new_port; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || +- skb_try_make_writable(skb, thoff + sizeof(*hdr))) +- return -1; +- +- hdr = (void *)(skb_network_header(skb) + thoff); +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- port = hdr->dest; +- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; +- hdr->dest = new_port; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- port = hdr->source; +- new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; +- hdr->source = new_port; +- break; +- default: +- return -1; +- } +- +- return nf_flow_nat_port(skb, thoff, protocol, port, new_port); +-} +-EXPORT_SYMBOL_GPL(nf_flow_dnat_port); +- +-static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) +-{ +- struct net_device *dev = data; +- +- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) +- return; +- +- flow_offload_dead(flow); +-} +- +-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, +- void *data) +-{ +- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); +- flush_delayed_work(&flowtable->gc_work); +-} +- +-void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +-{ +- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); +-} +-EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); +- +-void nf_flow_table_free(struct nf_flowtable *flow_table) +-{ +- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); +- WARN_ON(!nf_flow_offload_gc_step(flow_table)); +-} +-EXPORT_SYMBOL_GPL(nf_flow_table_free); +- +-static int nf_flow_table_netdev_event(struct notifier_block *this, +- unsigned long event, void *ptr) +-{ +- struct net_device *dev = netdev_notifier_info_to_dev(ptr); +- +- if (event != NETDEV_DOWN) +- return NOTIFY_DONE; +- +- nf_flow_table_cleanup(dev_net(dev), dev); +- +- return NOTIFY_DONE; +-} +- +-static struct notifier_block flow_offload_netdev_notifier = { +- .notifier_call = nf_flow_table_netdev_event, +-}; +- +-static int __init nf_flow_table_module_init(void) +-{ +- return register_netdevice_notifier(&flow_offload_netdev_notifier); +-} +- +-static void __exit nf_flow_table_module_exit(void) +-{ +- unregister_netdevice_notifier(&flow_offload_netdev_notifier); +-} +- +-module_init(nf_flow_table_module_init); +-module_exit(nf_flow_table_module_exit); +- +-MODULE_LICENSE("GPL"); +-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +--- /dev/null ++++ b/net/netfilter/nf_flow_table_core.c +@@ -0,0 +1,462 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/netdevice.h> ++#include <net/ip.h> ++#include <net/ip6_route.h> ++#include <net/netfilter/nf_tables.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_conntrack.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#include <net/netfilter/nf_conntrack_tuple.h> ++ ++struct flow_offload_entry { ++ struct flow_offload flow; ++ struct nf_conn *ct; ++ struct rcu_head rcu_head; ++}; ++ ++static void ++flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ++ struct nf_flow_route *route, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; ++ struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; ++ struct dst_entry *dst = route->tuple[dir].dst; ++ ++ ft->dir = dir; ++ ++ switch (ctt->src.l3num) { ++ case NFPROTO_IPV4: ++ ft->src_v4 = ctt->src.u3.in; ++ ft->dst_v4 = ctt->dst.u3.in; ++ ft->mtu = ip_dst_mtu_maybe_forward(dst, true); ++ break; ++ case NFPROTO_IPV6: ++ ft->src_v6 = ctt->src.u3.in6; ++ ft->dst_v6 = ctt->dst.u3.in6; ++ ft->mtu = ip6_dst_mtu_forward(dst); ++ break; ++ } ++ ++ ft->l3proto = ctt->src.l3num; ++ ft->l4proto = ctt->dst.protonum; ++ ft->src_port = ctt->src.u.tcp.port; ++ ft->dst_port = ctt->dst.u.tcp.port; ++ ++ ft->iifidx = route->tuple[dir].ifindex; ++ ft->oifidx = route->tuple[!dir].ifindex; ++ ft->dst_cache = dst; ++} ++ ++struct flow_offload * ++flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route) ++{ ++ struct flow_offload_entry *entry; ++ struct flow_offload *flow; ++ ++ if (unlikely(nf_ct_is_dying(ct) || ++ !atomic_inc_not_zero(&ct->ct_general.use))) ++ return NULL; ++ ++ entry = kzalloc(sizeof(*entry), GFP_ATOMIC); ++ if (!entry) ++ goto err_ct_refcnt; ++ ++ flow = &entry->flow; ++ ++ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst)) ++ goto err_dst_cache_original; ++ ++ if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst)) ++ goto err_dst_cache_reply; ++ ++ entry->ct = ct; ++ ++ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL); ++ flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY); ++ ++ if (ct->status & IPS_SRC_NAT) ++ flow->flags |= FLOW_OFFLOAD_SNAT; ++ else if (ct->status & IPS_DST_NAT) ++ flow->flags |= FLOW_OFFLOAD_DNAT; ++ ++ return flow; ++ ++err_dst_cache_reply: ++ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst); ++err_dst_cache_original: ++ kfree(entry); ++err_ct_refcnt: ++ nf_ct_put(ct); ++ ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(flow_offload_alloc); ++ ++void flow_offload_free(struct flow_offload *flow) ++{ ++ struct flow_offload_entry *e; ++ ++ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache); ++ dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache); ++ e = container_of(flow, struct flow_offload_entry, flow); ++ nf_ct_delete(e->ct, 0, 0); ++ nf_ct_put(e->ct); ++ kfree_rcu(e, rcu_head); ++} ++EXPORT_SYMBOL_GPL(flow_offload_free); ++ ++void flow_offload_dead(struct flow_offload *flow) ++{ ++ flow->flags |= FLOW_OFFLOAD_DYING; ++} ++EXPORT_SYMBOL_GPL(flow_offload_dead); ++ ++int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) ++{ ++ flow->timeout = (u32)jiffies; ++ ++ rhashtable_insert_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, ++ *flow_table->type->params); ++ rhashtable_insert_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, ++ *flow_table->type->params); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(flow_offload_add); ++ ++static void flow_offload_del(struct nf_flowtable *flow_table, ++ struct flow_offload *flow) ++{ ++ rhashtable_remove_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, ++ *flow_table->type->params); ++ rhashtable_remove_fast(&flow_table->rhashtable, ++ &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, ++ *flow_table->type->params); ++ ++ flow_offload_free(flow); ++} ++ ++struct flow_offload_tuple_rhash * ++flow_offload_lookup(struct nf_flowtable *flow_table, ++ struct flow_offload_tuple *tuple) ++{ ++ return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, ++ *flow_table->type->params); ++} ++EXPORT_SYMBOL_GPL(flow_offload_lookup); ++ ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, ++ void (*iter)(struct flow_offload *flow, void *data), ++ void *data) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct rhashtable_iter hti; ++ struct flow_offload *flow; ++ int err; ++ ++ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); ++ if (err) ++ return err; ++ ++ rhashtable_walk_start(&hti); ++ ++ while ((tuplehash = rhashtable_walk_next(&hti))) { ++ if (IS_ERR(tuplehash)) { ++ err = PTR_ERR(tuplehash); ++ if (err != -EAGAIN) ++ goto out; ++ ++ continue; ++ } ++ if (tuplehash->tuple.dir) ++ continue; ++ ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); ++ ++ iter(flow, data); ++ } ++out: ++ rhashtable_walk_stop(&hti); ++ rhashtable_walk_exit(&hti); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_iterate); ++ ++static inline bool nf_flow_has_expired(const struct flow_offload *flow) ++{ ++ return (__s32)(flow->timeout - (u32)jiffies) <= 0; ++} ++ ++static inline bool nf_flow_is_dying(const struct flow_offload *flow) ++{ ++ return flow->flags & FLOW_OFFLOAD_DYING; ++} ++ ++static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct rhashtable_iter hti; ++ struct flow_offload *flow; ++ int err; ++ ++ err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL); ++ if (err) ++ return 0; ++ ++ rhashtable_walk_start(&hti); ++ ++ while ((tuplehash = rhashtable_walk_next(&hti))) { ++ if (IS_ERR(tuplehash)) { ++ err = PTR_ERR(tuplehash); ++ if (err != -EAGAIN) ++ goto out; ++ ++ continue; ++ } ++ if (tuplehash->tuple.dir) ++ continue; ++ ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); ++ ++ if (nf_flow_has_expired(flow) || ++ nf_flow_is_dying(flow)) ++ flow_offload_del(flow_table, flow); ++ } ++out: ++ rhashtable_walk_stop(&hti); ++ rhashtable_walk_exit(&hti); ++ ++ return 1; ++} ++ ++void nf_flow_offload_work_gc(struct work_struct *work) ++{ ++ struct nf_flowtable *flow_table; ++ ++ flow_table = container_of(work, struct nf_flowtable, gc_work.work); ++ nf_flow_offload_gc_step(flow_table); ++ queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); ++ ++static u32 flow_offload_hash(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple *tuple = data; ++ ++ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple_rhash *tuplehash = data; ++ ++ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, ++ const void *ptr) ++{ ++ const struct flow_offload_tuple *tuple = arg->key; ++ const struct flow_offload_tuple_rhash *x = ptr; ++ ++ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) ++ return 1; ++ ++ return 0; ++} ++ ++const struct rhashtable_params nf_flow_offload_rhash_params = { ++ .head_offset = offsetof(struct flow_offload_tuple_rhash, node), ++ .hashfn = flow_offload_hash, ++ .obj_hashfn = flow_offload_hash_obj, ++ .obj_cmpfn = flow_offload_hash_cmp, ++ .automatic_shrinking = true, ++}; ++EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); ++ ++static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, ++ __be16 port, __be16 new_port) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, ++ __be16 port, __be16 new_port) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace2(&udph->check, skb, port, ++ new_port, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, __be16 port, __be16 new_port) ++{ ++ switch (protocol) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++int nf_flow_snat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir) ++{ ++ struct flow_ports *hdr; ++ __be16 port, new_port; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || ++ skb_try_make_writable(skb, thoff + sizeof(*hdr))) ++ return -1; ++ ++ hdr = (void *)(skb_network_header(skb) + thoff); ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ port = hdr->source; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; ++ hdr->source = new_port; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ port = hdr->dest; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; ++ hdr->dest = new_port; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_port(skb, thoff, protocol, port, new_port); ++} ++EXPORT_SYMBOL_GPL(nf_flow_snat_port); ++ ++int nf_flow_dnat_port(const struct flow_offload *flow, ++ struct sk_buff *skb, unsigned int thoff, ++ u8 protocol, enum flow_offload_tuple_dir dir) ++{ ++ struct flow_ports *hdr; ++ __be16 port, new_port; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) || ++ skb_try_make_writable(skb, thoff + sizeof(*hdr))) ++ return -1; ++ ++ hdr = (void *)(skb_network_header(skb) + thoff); ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ port = hdr->dest; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; ++ hdr->dest = new_port; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ port = hdr->source; ++ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; ++ hdr->source = new_port; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_port(skb, thoff, protocol, port, new_port); ++} ++EXPORT_SYMBOL_GPL(nf_flow_dnat_port); ++ ++static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) ++{ ++ struct net_device *dev = data; ++ ++ if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) ++ return; ++ ++ flow_offload_dead(flow); ++} ++ ++static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, ++ void *data) ++{ ++ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); ++ flush_delayed_work(&flowtable->gc_work); ++} ++ ++void nf_flow_table_cleanup(struct net *net, struct net_device *dev) ++{ ++ nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); ++ ++void nf_flow_table_free(struct nf_flowtable *flow_table) ++{ ++ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); ++ WARN_ON(!nf_flow_offload_gc_step(flow_table)); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_free); ++ ++static int nf_flow_table_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ ++ if (event != NETDEV_DOWN) ++ return NOTIFY_DONE; ++ ++ nf_flow_table_cleanup(dev_net(dev), dev); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block flow_offload_netdev_notifier = { ++ .notifier_call = nf_flow_table_netdev_event, ++}; ++ ++static int __init nf_flow_table_module_init(void) ++{ ++ return register_netdevice_notifier(&flow_offload_netdev_notifier); ++} ++ ++static void __exit nf_flow_table_module_exit(void) ++{ ++ unregister_netdevice_notifier(&flow_offload_netdev_notifier); ++} ++ ++module_init(nf_flow_table_module_init); ++module_exit(nf_flow_table_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch b/target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch new file mode 100644 index 0000000000..e25a66f934 --- /dev/null +++ b/target/linux/generic/backport-4.14/353-netfilter-nf_flow_table-move-ipv4-offload-hook-code-.patch @@ -0,0 +1,522 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Sat, 17 Feb 2018 11:49:44 +0100 +Subject: [PATCH] netfilter: nf_flow_table: move ipv4 offload hook code to + nf_flow_table + +Allows some minor code sharing with the ipv6 hook code and is also +useful as preparation for adding iptables support for offload + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + create mode 100644 net/netfilter/nf_flow_table_ip.c + +--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -2,248 +2,8 @@ + #include <linux/init.h> + #include <linux/module.h> + #include <linux/netfilter.h> +-#include <linux/rhashtable.h> +-#include <linux/ip.h> +-#include <linux/netdevice.h> +-#include <net/ip.h> +-#include <net/neighbour.h> + #include <net/netfilter/nf_flow_table.h> + #include <net/netfilter/nf_tables.h> +-/* For layer 4 checksum field offset. */ +-#include <linux/tcp.h> +-#include <linux/udp.h> +- +-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, +- __be32 addr, __be32 new_addr) +-{ +- struct tcphdr *tcph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || +- skb_try_make_writable(skb, thoff + sizeof(*tcph))) +- return -1; +- +- tcph = (void *)(skb_network_header(skb) + thoff); +- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); +- +- return 0; +-} +- +-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, +- __be32 addr, __be32 new_addr) +-{ +- struct udphdr *udph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || +- skb_try_make_writable(skb, thoff + sizeof(*udph))) +- return -1; +- +- udph = (void *)(skb_network_header(skb) + thoff); +- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { +- inet_proto_csum_replace4(&udph->check, skb, addr, +- new_addr, true); +- if (!udph->check) +- udph->check = CSUM_MANGLED_0; +- } +- +- return 0; +-} +- +-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, +- unsigned int thoff, __be32 addr, +- __be32 new_addr) +-{ +- switch (iph->protocol) { +- case IPPROTO_TCP: +- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) +- return NF_DROP; +- break; +- case IPPROTO_UDP: +- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) +- return NF_DROP; +- break; +- } +- +- return 0; +-} +- +-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, +- struct iphdr *iph, unsigned int thoff, +- enum flow_offload_tuple_dir dir) +-{ +- __be32 addr, new_addr; +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- addr = iph->saddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; +- iph->saddr = new_addr; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- addr = iph->daddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; +- iph->daddr = new_addr; +- break; +- default: +- return -1; +- } +- csum_replace4(&iph->check, addr, new_addr); +- +- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +-} +- +-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, +- struct iphdr *iph, unsigned int thoff, +- enum flow_offload_tuple_dir dir) +-{ +- __be32 addr, new_addr; +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- addr = iph->daddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; +- iph->daddr = new_addr; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- addr = iph->saddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; +- iph->saddr = new_addr; +- break; +- default: +- return -1; +- } +- +- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); +-} +- +-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, +- enum flow_offload_tuple_dir dir) +-{ +- struct iphdr *iph = ip_hdr(skb); +- unsigned int thoff = iph->ihl * 4; +- +- if (flow->flags & FLOW_OFFLOAD_SNAT && +- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || +- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) +- return -1; +- if (flow->flags & FLOW_OFFLOAD_DNAT && +- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || +- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) +- return -1; +- +- return 0; +-} +- +-static bool ip_has_options(unsigned int thoff) +-{ +- return thoff != sizeof(struct iphdr); +-} +- +-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, +- struct flow_offload_tuple *tuple) +-{ +- struct flow_ports *ports; +- unsigned int thoff; +- struct iphdr *iph; +- +- if (!pskb_may_pull(skb, sizeof(*iph))) +- return -1; +- +- iph = ip_hdr(skb); +- thoff = iph->ihl * 4; +- +- if (ip_is_fragment(iph) || +- unlikely(ip_has_options(thoff))) +- return -1; +- +- if (iph->protocol != IPPROTO_TCP && +- iph->protocol != IPPROTO_UDP) +- return -1; +- +- thoff = iph->ihl * 4; +- if (!pskb_may_pull(skb, thoff + sizeof(*ports))) +- return -1; +- +- ports = (struct flow_ports *)(skb_network_header(skb) + thoff); +- +- tuple->src_v4.s_addr = iph->saddr; +- tuple->dst_v4.s_addr = iph->daddr; +- tuple->src_port = ports->source; +- tuple->dst_port = ports->dest; +- tuple->l3proto = AF_INET; +- tuple->l4proto = iph->protocol; +- tuple->iifidx = dev->ifindex; +- +- return 0; +-} +- +-/* Based on ip_exceeds_mtu(). */ +-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +-{ +- if (skb->len <= mtu) +- return false; +- +- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) +- return false; +- +- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) +- return false; +- +- return true; +-} +- +-unsigned int +-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, +- const struct nf_hook_state *state) +-{ +- struct flow_offload_tuple_rhash *tuplehash; +- struct nf_flowtable *flow_table = priv; +- struct flow_offload_tuple tuple = {}; +- enum flow_offload_tuple_dir dir; +- struct flow_offload *flow; +- struct net_device *outdev; +- const struct rtable *rt; +- struct iphdr *iph; +- __be32 nexthop; +- +- if (skb->protocol != htons(ETH_P_IP)) +- return NF_ACCEPT; +- +- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) +- return NF_ACCEPT; +- +- tuplehash = flow_offload_lookup(flow_table, &tuple); +- if (tuplehash == NULL) +- return NF_ACCEPT; +- +- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); +- if (!outdev) +- return NF_ACCEPT; +- +- dir = tuplehash->tuple.dir; +- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); +- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; +- +- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) +- return NF_ACCEPT; +- +- if (skb_try_make_writable(skb, sizeof(*iph))) +- return NF_DROP; +- +- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && +- nf_flow_nat_ip(flow, skb, dir) < 0) +- return NF_DROP; +- +- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; +- iph = ip_hdr(skb); +- ip_decrease_ttl(iph); +- +- skb->dev = outdev; +- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); +- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); +- +- return NF_STOLEN; +-} +-EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); + + static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -113,7 +113,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_ + + # flow table infrastructure + obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o +-nf_flow_table-objs := nf_flow_table_core.o ++nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o + + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o + +--- /dev/null ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -0,0 +1,245 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/ip.h> ++#include <linux/netdevice.h> ++#include <net/ip.h> ++#include <net/neighbour.h> ++#include <net/netfilter/nf_flow_table.h> ++/* For layer 4 checksum field offset. */ ++#include <linux/tcp.h> ++#include <linux/udp.h> ++ ++static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace4(&udph->check, skb, addr, ++ new_addr, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, ++ unsigned int thoff, __be32 addr, ++ __be32 new_addr) ++{ ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ csum_replace4(&iph->check, addr, new_addr); ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct iphdr *iph = ip_hdr(skb); ++ unsigned int thoff = iph->ihl * 4; ++ ++ if (flow->flags & FLOW_OFFLOAD_SNAT && ++ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ if (flow->flags & FLOW_OFFLOAD_DNAT && ++ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static bool ip_has_options(unsigned int thoff) ++{ ++ return thoff != sizeof(struct iphdr); ++} ++ ++static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, ++ struct flow_offload_tuple *tuple) ++{ ++ struct flow_ports *ports; ++ unsigned int thoff; ++ struct iphdr *iph; ++ ++ if (!pskb_may_pull(skb, sizeof(*iph))) ++ return -1; ++ ++ iph = ip_hdr(skb); ++ thoff = iph->ihl * 4; ++ ++ if (ip_is_fragment(iph) || ++ unlikely(ip_has_options(thoff))) ++ return -1; ++ ++ if (iph->protocol != IPPROTO_TCP && ++ iph->protocol != IPPROTO_UDP) ++ return -1; ++ ++ thoff = iph->ihl * 4; ++ if (!pskb_may_pull(skb, thoff + sizeof(*ports))) ++ return -1; ++ ++ ports = (struct flow_ports *)(skb_network_header(skb) + thoff); ++ ++ tuple->src_v4.s_addr = iph->saddr; ++ tuple->dst_v4.s_addr = iph->daddr; ++ tuple->src_port = ports->source; ++ tuple->dst_port = ports->dest; ++ tuple->l3proto = AF_INET; ++ tuple->l4proto = iph->protocol; ++ tuple->iifidx = dev->ifindex; ++ ++ return 0; ++} ++ ++/* Based on ip_exceeds_mtu(). */ ++static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu) ++ return false; ++ ++ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ++ return false; ++ ++ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) ++ return false; ++ ++ return true; ++} ++ ++unsigned int ++nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table = priv; ++ struct flow_offload_tuple tuple = {}; ++ enum flow_offload_tuple_dir dir; ++ struct flow_offload *flow; ++ struct net_device *outdev; ++ const struct rtable *rt; ++ struct iphdr *iph; ++ __be32 nexthop; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ return NF_ACCEPT; ++ ++ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) ++ return NF_ACCEPT; ++ ++ tuplehash = flow_offload_lookup(flow_table, &tuple); ++ if (tuplehash == NULL) ++ return NF_ACCEPT; ++ ++ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); ++ if (!outdev) ++ return NF_ACCEPT; ++ ++ dir = tuplehash->tuple.dir; ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ++ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; ++ ++ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) ++ return NF_ACCEPT; ++ ++ if (skb_try_make_writable(skb, sizeof(*iph))) ++ return NF_DROP; ++ ++ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && ++ nf_flow_nat_ip(flow, skb, dir) < 0) ++ return NF_DROP; ++ ++ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ++ iph = ip_hdr(skb); ++ ip_decrease_ttl(iph); ++ ++ skb->dev = outdev; ++ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); ++ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); ++ ++ return NF_STOLEN; ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); diff --git a/target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch b/target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch new file mode 100644 index 0000000000..4ee5532438 --- /dev/null +++ b/target/linux/generic/backport-4.14/354-netfilter-nf_flow_table-move-ip-header-check-out-of-.patch @@ -0,0 +1,32 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Sat, 17 Feb 2018 11:51:20 +0100 +Subject: [PATCH] netfilter: nf_flow_table: move ip header check out of + nf_flow_exceeds_mtu + +Allows the function to be shared with the IPv6 hook code + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -181,9 +181,6 @@ static bool nf_flow_exceeds_mtu(const st + if (skb->len <= mtu) + return false; + +- if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) +- return false; +- + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + +@@ -222,7 +219,8 @@ nf_flow_offload_ip_hook(void *priv, stru + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; + +- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) ++ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && ++ (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*iph))) diff --git a/target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch b/target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch new file mode 100644 index 0000000000..20ab0ed504 --- /dev/null +++ b/target/linux/generic/backport-4.14/355-netfilter-nf_flow_table-move-ipv6-offload-hook-code-.patch @@ -0,0 +1,483 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Sat, 17 Feb 2018 11:55:51 +0100 +Subject: [PATCH] netfilter: nf_flow_table: move ipv6 offload hook code to + nf_flow_table + +Useful as preparation for adding iptables support for offload + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -3,240 +3,8 @@ + #include <linux/module.h> + #include <linux/netfilter.h> + #include <linux/rhashtable.h> +-#include <linux/ipv6.h> +-#include <linux/netdevice.h> +-#include <net/ipv6.h> +-#include <net/ip6_route.h> +-#include <net/neighbour.h> + #include <net/netfilter/nf_flow_table.h> + #include <net/netfilter/nf_tables.h> +-/* For layer 4 checksum field offset. */ +-#include <linux/tcp.h> +-#include <linux/udp.h> +- +-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, +- struct in6_addr *addr, +- struct in6_addr *new_addr) +-{ +- struct tcphdr *tcph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || +- skb_try_make_writable(skb, thoff + sizeof(*tcph))) +- return -1; +- +- tcph = (void *)(skb_network_header(skb) + thoff); +- inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, +- new_addr->s6_addr32, true); +- +- return 0; +-} +- +-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, +- struct in6_addr *addr, +- struct in6_addr *new_addr) +-{ +- struct udphdr *udph; +- +- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || +- skb_try_make_writable(skb, thoff + sizeof(*udph))) +- return -1; +- +- udph = (void *)(skb_network_header(skb) + thoff); +- if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { +- inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, +- new_addr->s6_addr32, true); +- if (!udph->check) +- udph->check = CSUM_MANGLED_0; +- } +- +- return 0; +-} +- +-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, +- unsigned int thoff, struct in6_addr *addr, +- struct in6_addr *new_addr) +-{ +- switch (ip6h->nexthdr) { +- case IPPROTO_TCP: +- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) +- return NF_DROP; +- break; +- case IPPROTO_UDP: +- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) +- return NF_DROP; +- break; +- } +- +- return 0; +-} +- +-static int nf_flow_snat_ipv6(const struct flow_offload *flow, +- struct sk_buff *skb, struct ipv6hdr *ip6h, +- unsigned int thoff, +- enum flow_offload_tuple_dir dir) +-{ +- struct in6_addr addr, new_addr; +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- addr = ip6h->saddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; +- ip6h->saddr = new_addr; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- addr = ip6h->daddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; +- ip6h->daddr = new_addr; +- break; +- default: +- return -1; +- } +- +- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +-} +- +-static int nf_flow_dnat_ipv6(const struct flow_offload *flow, +- struct sk_buff *skb, struct ipv6hdr *ip6h, +- unsigned int thoff, +- enum flow_offload_tuple_dir dir) +-{ +- struct in6_addr addr, new_addr; +- +- switch (dir) { +- case FLOW_OFFLOAD_DIR_ORIGINAL: +- addr = ip6h->daddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; +- ip6h->daddr = new_addr; +- break; +- case FLOW_OFFLOAD_DIR_REPLY: +- addr = ip6h->saddr; +- new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; +- ip6h->saddr = new_addr; +- break; +- default: +- return -1; +- } +- +- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +-} +- +-static int nf_flow_nat_ipv6(const struct flow_offload *flow, +- struct sk_buff *skb, +- enum flow_offload_tuple_dir dir) +-{ +- struct ipv6hdr *ip6h = ipv6_hdr(skb); +- unsigned int thoff = sizeof(*ip6h); +- +- if (flow->flags & FLOW_OFFLOAD_SNAT && +- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || +- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) +- return -1; +- if (flow->flags & FLOW_OFFLOAD_DNAT && +- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || +- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) +- return -1; +- +- return 0; +-} +- +-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, +- struct flow_offload_tuple *tuple) +-{ +- struct flow_ports *ports; +- struct ipv6hdr *ip6h; +- unsigned int thoff; +- +- if (!pskb_may_pull(skb, sizeof(*ip6h))) +- return -1; +- +- ip6h = ipv6_hdr(skb); +- +- if (ip6h->nexthdr != IPPROTO_TCP && +- ip6h->nexthdr != IPPROTO_UDP) +- return -1; +- +- thoff = sizeof(*ip6h); +- if (!pskb_may_pull(skb, thoff + sizeof(*ports))) +- return -1; +- +- ports = (struct flow_ports *)(skb_network_header(skb) + thoff); +- +- tuple->src_v6 = ip6h->saddr; +- tuple->dst_v6 = ip6h->daddr; +- tuple->src_port = ports->source; +- tuple->dst_port = ports->dest; +- tuple->l3proto = AF_INET6; +- tuple->l4proto = ip6h->nexthdr; +- tuple->iifidx = dev->ifindex; +- +- return 0; +-} +- +-/* Based on ip_exceeds_mtu(). */ +-static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +-{ +- if (skb->len <= mtu) +- return false; +- +- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) +- return false; +- +- return true; +-} +- +-unsigned int +-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, +- const struct nf_hook_state *state) +-{ +- struct flow_offload_tuple_rhash *tuplehash; +- struct nf_flowtable *flow_table = priv; +- struct flow_offload_tuple tuple = {}; +- enum flow_offload_tuple_dir dir; +- struct flow_offload *flow; +- struct net_device *outdev; +- struct in6_addr *nexthop; +- struct ipv6hdr *ip6h; +- struct rt6_info *rt; +- +- if (skb->protocol != htons(ETH_P_IPV6)) +- return NF_ACCEPT; +- +- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) +- return NF_ACCEPT; +- +- tuplehash = flow_offload_lookup(flow_table, &tuple); +- if (tuplehash == NULL) +- return NF_ACCEPT; +- +- outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); +- if (!outdev) +- return NF_ACCEPT; +- +- dir = tuplehash->tuple.dir; +- flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); +- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; +- +- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) +- return NF_ACCEPT; +- +- if (skb_try_make_writable(skb, sizeof(*ip6h))) +- return NF_DROP; +- +- if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && +- nf_flow_nat_ipv6(flow, skb, dir) < 0) +- return NF_DROP; +- +- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; +- ip6h = ipv6_hdr(skb); +- ip6h->hop_limit--; +- +- skb->dev = outdev; +- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); +- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); +- +- return NF_STOLEN; +-} +-EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); + + static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -4,8 +4,11 @@ + #include <linux/netfilter.h> + #include <linux/rhashtable.h> + #include <linux/ip.h> ++#include <linux/ipv6.h> + #include <linux/netdevice.h> + #include <net/ip.h> ++#include <net/ipv6.h> ++#include <net/ip6_route.h> + #include <net/neighbour.h> + #include <net/netfilter/nf_flow_table.h> + /* For layer 4 checksum field offset. */ +@@ -241,3 +244,215 @@ nf_flow_offload_ip_hook(void *priv, stru + return NF_STOLEN; + } + EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); ++ ++static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, ++ struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, ++ new_addr->s6_addr32, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, ++ struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, ++ new_addr->s6_addr32, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, struct in6_addr *addr, ++ struct in6_addr *new_addr) ++{ ++ switch (ip6h->nexthdr) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_snat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct in6_addr addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = ip6h->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; ++ ip6h->saddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = ip6h->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; ++ ip6h->daddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); ++} ++ ++static int nf_flow_dnat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, struct ipv6hdr *ip6h, ++ unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct in6_addr addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = ip6h->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; ++ ip6h->daddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = ip6h->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; ++ ip6h->saddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); ++} ++ ++static int nf_flow_nat_ipv6(const struct flow_offload *flow, ++ struct sk_buff *skb, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct ipv6hdr *ip6h = ipv6_hdr(skb); ++ unsigned int thoff = sizeof(*ip6h); ++ ++ if (flow->flags & FLOW_OFFLOAD_SNAT && ++ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || ++ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) ++ return -1; ++ if (flow->flags & FLOW_OFFLOAD_DNAT && ++ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || ++ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ++ struct flow_offload_tuple *tuple) ++{ ++ struct flow_ports *ports; ++ struct ipv6hdr *ip6h; ++ unsigned int thoff; ++ ++ if (!pskb_may_pull(skb, sizeof(*ip6h))) ++ return -1; ++ ++ ip6h = ipv6_hdr(skb); ++ ++ if (ip6h->nexthdr != IPPROTO_TCP && ++ ip6h->nexthdr != IPPROTO_UDP) ++ return -1; ++ ++ thoff = sizeof(*ip6h); ++ if (!pskb_may_pull(skb, thoff + sizeof(*ports))) ++ return -1; ++ ++ ports = (struct flow_ports *)(skb_network_header(skb) + thoff); ++ ++ tuple->src_v6 = ip6h->saddr; ++ tuple->dst_v6 = ip6h->daddr; ++ tuple->src_port = ports->source; ++ tuple->dst_port = ports->dest; ++ tuple->l3proto = AF_INET6; ++ tuple->l4proto = ip6h->nexthdr; ++ tuple->iifidx = dev->ifindex; ++ ++ return 0; ++} ++ ++unsigned int ++nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table = priv; ++ struct flow_offload_tuple tuple = {}; ++ enum flow_offload_tuple_dir dir; ++ struct flow_offload *flow; ++ struct net_device *outdev; ++ struct in6_addr *nexthop; ++ struct ipv6hdr *ip6h; ++ struct rt6_info *rt; ++ ++ if (skb->protocol != htons(ETH_P_IPV6)) ++ return NF_ACCEPT; ++ ++ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) ++ return NF_ACCEPT; ++ ++ tuplehash = flow_offload_lookup(flow_table, &tuple); ++ if (tuplehash == NULL) ++ return NF_ACCEPT; ++ ++ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); ++ if (!outdev) ++ return NF_ACCEPT; ++ ++ dir = tuplehash->tuple.dir; ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ++ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; ++ ++ if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) ++ return NF_ACCEPT; ++ ++ if (skb_try_make_writable(skb, sizeof(*ip6h))) ++ return NF_DROP; ++ ++ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && ++ nf_flow_nat_ipv6(flow, skb, dir) < 0) ++ return NF_DROP; ++ ++ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ++ ip6h = ipv6_hdr(skb); ++ ip6h->hop_limit--; ++ ++ skb->dev = outdev; ++ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); ++ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); ++ ++ return NF_STOLEN; ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); diff --git a/target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch b/target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch new file mode 100644 index 0000000000..7d4bdc6d8d --- /dev/null +++ b/target/linux/generic/backport-4.14/356-netfilter-nf_flow_table-relax-mixed-ipv4-ipv6-flowta.patch @@ -0,0 +1,23 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Sat, 17 Feb 2018 12:02:28 +0100 +Subject: [PATCH] netfilter: nf_flow_table: relax mixed ipv4/ipv6 flowtable + dependencies + +Since the offload hook code was moved, this table no longer depends on +the IPv4 and IPv6 flowtable modules + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -670,8 +670,7 @@ endif # NF_TABLES + + config NF_FLOW_TABLE_INET + tristate "Netfilter flow table mixed IPv4/IPv6 module" +- depends on NF_FLOW_TABLE_IPV4 +- depends on NF_FLOW_TABLE_IPV6 ++ depends on NF_FLOW_TABLE + help + This option adds the flow table mixed IPv4/IPv6 support. + diff --git a/target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch b/target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch new file mode 100644 index 0000000000..75cbda6baf --- /dev/null +++ b/target/linux/generic/backport-4.14/357-netfilter-nf_flow_table-move-init-code-to-nf_flow_ta.patch @@ -0,0 +1,298 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Sun, 18 Feb 2018 18:16:31 +0100 +Subject: [PATCH] netfilter: nf_flow_table: move init code to + nf_flow_table_core.c + +Reduces duplication of .gc and .params in flowtable type definitions and +makes the API clearer + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -14,9 +14,8 @@ struct nf_flowtable; + struct nf_flowtable_type { + struct list_head list; + int family; +- void (*gc)(struct work_struct *work); ++ int (*init)(struct nf_flowtable *ft); + void (*free)(struct nf_flowtable *ft); +- const struct rhashtable_params *params; + nf_hookfn *hook; + struct module *owner; + }; +@@ -100,9 +99,8 @@ int nf_flow_table_iterate(struct nf_flow + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev); + ++int nf_flow_table_init(struct nf_flowtable *flow_table); + void nf_flow_table_free(struct nf_flowtable *flow_table); +-void nf_flow_offload_work_gc(struct work_struct *work); +-extern const struct rhashtable_params nf_flow_offload_rhash_params; + + void flow_offload_dead(struct flow_offload *flow); + +--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -7,8 +7,7 @@ + + static struct nf_flowtable_type flowtable_ipv4 = { + .family = NFPROTO_IPV4, +- .params = &nf_flow_offload_rhash_params, +- .gc = nf_flow_offload_work_gc, ++ .init = nf_flow_table_init, + .free = nf_flow_table_free, + .hook = nf_flow_offload_ip_hook, + .owner = THIS_MODULE, +--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c ++++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c +@@ -8,8 +8,7 @@ + + static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, +- .params = &nf_flow_offload_rhash_params, +- .gc = nf_flow_offload_work_gc, ++ .init = nf_flow_table_init, + .free = nf_flow_table_free, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -116,16 +116,50 @@ void flow_offload_dead(struct flow_offlo + } + EXPORT_SYMBOL_GPL(flow_offload_dead); + ++static u32 flow_offload_hash(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple *tuple = data; ++ ++ return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) ++{ ++ const struct flow_offload_tuple_rhash *tuplehash = data; ++ ++ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); ++} ++ ++static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, ++ const void *ptr) ++{ ++ const struct flow_offload_tuple *tuple = arg->key; ++ const struct flow_offload_tuple_rhash *x = ptr; ++ ++ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) ++ return 1; ++ ++ return 0; ++} ++ ++static const struct rhashtable_params nf_flow_offload_rhash_params = { ++ .head_offset = offsetof(struct flow_offload_tuple_rhash, node), ++ .hashfn = flow_offload_hash, ++ .obj_hashfn = flow_offload_hash_obj, ++ .obj_cmpfn = flow_offload_hash_cmp, ++ .automatic_shrinking = true, ++}; ++ + int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) + { + flow->timeout = (u32)jiffies; + + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +- *flow_table->type->params); ++ nf_flow_offload_rhash_params); + rhashtable_insert_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, +- *flow_table->type->params); ++ nf_flow_offload_rhash_params); + return 0; + } + EXPORT_SYMBOL_GPL(flow_offload_add); +@@ -135,10 +169,10 @@ static void flow_offload_del(struct nf_f + { + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, +- *flow_table->type->params); ++ nf_flow_offload_rhash_params); + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, +- *flow_table->type->params); ++ nf_flow_offload_rhash_params); + + flow_offload_free(flow); + } +@@ -148,7 +182,7 @@ flow_offload_lookup(struct nf_flowtable + struct flow_offload_tuple *tuple) + { + return rhashtable_lookup_fast(&flow_table->rhashtable, tuple, +- *flow_table->type->params); ++ nf_flow_offload_rhash_params); + } + EXPORT_SYMBOL_GPL(flow_offload_lookup); + +@@ -237,7 +271,7 @@ out: + return 1; + } + +-void nf_flow_offload_work_gc(struct work_struct *work) ++static void nf_flow_offload_work_gc(struct work_struct *work) + { + struct nf_flowtable *flow_table; + +@@ -245,42 +279,6 @@ void nf_flow_offload_work_gc(struct work + nf_flow_offload_gc_step(flow_table); + queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); + } +-EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc); +- +-static u32 flow_offload_hash(const void *data, u32 len, u32 seed) +-{ +- const struct flow_offload_tuple *tuple = data; +- +- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); +-} +- +-static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) +-{ +- const struct flow_offload_tuple_rhash *tuplehash = data; +- +- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); +-} +- +-static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, +- const void *ptr) +-{ +- const struct flow_offload_tuple *tuple = arg->key; +- const struct flow_offload_tuple_rhash *x = ptr; +- +- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) +- return 1; +- +- return 0; +-} +- +-const struct rhashtable_params nf_flow_offload_rhash_params = { +- .head_offset = offsetof(struct flow_offload_tuple_rhash, node), +- .hashfn = flow_offload_hash, +- .obj_hashfn = flow_offload_hash_obj, +- .obj_cmpfn = flow_offload_hash_cmp, +- .automatic_shrinking = true, +-}; +-EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params); + + static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) +@@ -398,6 +396,24 @@ int nf_flow_dnat_port(const struct flow_ + } + EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + ++int nf_flow_table_init(struct nf_flowtable *flowtable) ++{ ++ int err; ++ ++ INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); ++ ++ err = rhashtable_init(&flowtable->rhashtable, ++ &nf_flow_offload_rhash_params); ++ if (err < 0) ++ return err; ++ ++ queue_delayed_work(system_power_efficient_wq, ++ &flowtable->gc_work, HZ); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_init); ++ + static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) + { + struct net_device *dev = data; +@@ -423,8 +439,10 @@ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup) + + void nf_flow_table_free(struct nf_flowtable *flow_table) + { ++ cancel_delayed_work_sync(&flow_table->gc_work); + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); ++ rhashtable_destroy(&flow_table->rhashtable); + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + +--- a/net/netfilter/nf_flow_table_inet.c ++++ b/net/netfilter/nf_flow_table_inet.c +@@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, st + + static struct nf_flowtable_type flowtable_inet = { + .family = NFPROTO_INET, +- .params = &nf_flow_offload_rhash_params, +- .gc = nf_flow_offload_work_gc, ++ .init = nf_flow_table_init, + .free = nf_flow_table_free, + .hook = nf_flow_offload_inet_hook, + .owner = THIS_MODULE, +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5013,40 +5013,38 @@ static int nf_tables_newflowtable(struct + } + + flowtable->data.type = type; +- err = rhashtable_init(&flowtable->data.rhashtable, type->params); ++ err = type->init(&flowtable->data); + if (err < 0) + goto err3; + + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) +- goto err3; ++ goto err4; + + for (i = 0; i < flowtable->ops_len; i++) { + err = nf_register_net_hook(net, &flowtable->ops[i]); + if (err < 0) +- goto err4; ++ goto err5; + } + + err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (err < 0) +- goto err5; +- +- INIT_DEFERRABLE_WORK(&flowtable->data.gc_work, type->gc); +- queue_delayed_work(system_power_efficient_wq, +- &flowtable->data.gc_work, HZ); ++ goto err6; + + list_add_tail_rcu(&flowtable->list, &table->flowtables); + table->use++; + + return 0; +-err5: ++err6: + i = flowtable->ops_len; +-err4: ++err5: + for (k = i - 1; k >= 0; k--) + nf_unregister_net_hook(net, &flowtable->ops[i]); + + kfree(flowtable->ops); ++err4: ++ flowtable->data.type->free(&flowtable->data); + err3: + module_put(type->owner); + err2: +@@ -5325,10 +5323,8 @@ err: + + static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) + { +- cancel_delayed_work_sync(&flowtable->data.gc_work); + kfree(flowtable->name); + flowtable->data.type->free(&flowtable->data); +- rhashtable_destroy(&flowtable->data.rhashtable); + module_put(flowtable->data.type->owner); + } + diff --git a/target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch b/target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch new file mode 100644 index 0000000000..d4b746d0e9 --- /dev/null +++ b/target/linux/generic/backport-4.14/358-netfilter-nf_flow_table-fix-priv-pointer-for-netdev-.patch @@ -0,0 +1,22 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Tue, 20 Feb 2018 14:48:51 +0100 +Subject: [PATCH] netfilter: nf_flow_table: fix priv pointer for netdev hook + +The offload ip hook expects a pointer to the flowtable, not to the +rhashtable. Since the rhashtable is the first member, this is safe for +the moment, but breaks as soon as the structure layout changes + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4879,7 +4879,7 @@ static int nf_tables_flowtable_parse_hoo + flowtable->ops[i].pf = NFPROTO_NETDEV; + flowtable->ops[i].hooknum = hooknum; + flowtable->ops[i].priority = priority; +- flowtable->ops[i].priv = &flowtable->data.rhashtable; ++ flowtable->ops[i].priv = &flowtable->data; + flowtable->ops[i].hook = flowtable->data.type->hook; + flowtable->ops[i].dev = dev_array[i]; + } diff --git a/target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch b/target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch new file mode 100644 index 0000000000..ad7d81f4f9 --- /dev/null +++ b/target/linux/generic/backport-4.14/359-netfilter-nf_flow_table-track-flow-tables-in-nf_flow.patch @@ -0,0 +1,114 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Tue, 20 Feb 2018 14:08:14 +0100 +Subject: [PATCH] netfilter: nf_flow_table: track flow tables in nf_flow_table + directly + +Avoids having nf_flow_table depend on nftables (useful for future +iptables backport work) + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -21,6 +21,7 @@ struct nf_flowtable_type { + }; + + struct nf_flowtable { ++ struct list_head list; + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; + struct delayed_work gc_work; +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1091,9 +1091,6 @@ struct nft_flowtable { + struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); +-void nft_flow_table_iterate(struct net *net, +- void (*iter)(struct nf_flowtable *flowtable, void *data), +- void *data); + + void nft_register_flowtable_type(struct nf_flowtable_type *type); + void nft_unregister_flowtable_type(struct nf_flowtable_type *type); +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -18,6 +18,9 @@ struct flow_offload_entry { + struct rcu_head rcu_head; + }; + ++static DEFINE_MUTEX(flowtable_lock); ++static LIST_HEAD(flowtables); ++ + static void + flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, + struct nf_flow_route *route, +@@ -410,6 +413,10 @@ int nf_flow_table_init(struct nf_flowtab + queue_delayed_work(system_power_efficient_wq, + &flowtable->gc_work, HZ); + ++ mutex_lock(&flowtable_lock); ++ list_add(&flowtable->list, &flowtables); ++ mutex_unlock(&flowtable_lock); ++ + return 0; + } + EXPORT_SYMBOL_GPL(nf_flow_table_init); +@@ -425,20 +432,28 @@ static void nf_flow_table_do_cleanup(str + } + + static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, +- void *data) ++ struct net_device *dev) + { +- nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data); ++ nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); + flush_delayed_work(&flowtable->gc_work); + } + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev) + { +- nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev); ++ struct nf_flowtable *flowtable; ++ ++ mutex_lock(&flowtable_lock); ++ list_for_each_entry(flowtable, &flowtables, list) ++ nf_flow_table_iterate_cleanup(flowtable, dev); ++ mutex_unlock(&flowtable_lock); + } + EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + + void nf_flow_table_free(struct nf_flowtable *flow_table) + { ++ mutex_lock(&flowtable_lock); ++ list_del(&flow_table->list); ++ mutex_unlock(&flowtable_lock); + cancel_delayed_work_sync(&flow_table->gc_work); + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4923,23 +4923,6 @@ static const struct nf_flowtable_type *n + return ERR_PTR(-ENOENT); + } + +-void nft_flow_table_iterate(struct net *net, +- void (*iter)(struct nf_flowtable *flowtable, void *data), +- void *data) +-{ +- struct nft_flowtable *flowtable; +- const struct nft_table *table; +- +- nfnl_lock(NFNL_SUBSYS_NFTABLES); +- list_for_each_entry(table, &net->nft.tables, list) { +- list_for_each_entry(flowtable, &table->flowtables, list) { +- iter(&flowtable->data, data); +- } +- } +- nfnl_unlock(NFNL_SUBSYS_NFTABLES); +-} +-EXPORT_SYMBOL_GPL(nft_flow_table_iterate); +- + static void nft_unregister_flowtable_net_hooks(struct net *net, + struct nft_flowtable *flowtable) + { diff --git a/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch b/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch new file mode 100644 index 0000000000..f3c2ec8ea5 --- /dev/null +++ b/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-add-hardware-offload-support.patch @@ -0,0 +1,566 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Thu, 11 Jan 2018 16:32:00 +0100 +Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support + +This patch adds the infrastructure to offload flows to hardware, in case +the nic/switch comes with built-in flow tables capabilities. + +If the hardware comes with no hardware flow tables or they have +limitations in terms of features, the existing infrastructure falls back +to the software flow table implementation. + +The software flow table garbage collector skips entries that resides in +the hardware, so the hardware will be responsible for releasing this +flow table entry too via flow_offload_dead(). + +Hardware configuration, either to add or to delete entries, is done from +the hardware offload workqueue, to ensure this is done from user context +given that we may sleep when grabbing the mdio mutex. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/netfilter/nf_flow_table_hw.c + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -826,6 +826,13 @@ struct xfrmdev_ops { + }; + #endif + ++struct flow_offload; ++ ++enum flow_offload_type { ++ FLOW_OFFLOAD_ADD = 0, ++ FLOW_OFFLOAD_DEL, ++}; ++ + /* + * This structure defines the management hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are +@@ -1057,6 +1064,10 @@ struct xfrmdev_ops { + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); + * ++ * int (*ndo_flow_offload)(enum flow_offload_type type, ++ * struct flow_offload *flow); ++ * Adds/deletes flow entry to/from net device flowtable. ++ * + * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); + * Called to change device carrier. Soft-devices (like dummy, team, etc) + * which do not represent real hardware may define this to allow their +@@ -1281,6 +1292,8 @@ struct net_device_ops { + int (*ndo_bridge_dellink)(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags); ++ int (*ndo_flow_offload)(enum flow_offload_type type, ++ struct flow_offload *flow); + int (*ndo_change_carrier)(struct net_device *dev, + bool new_carrier); + int (*ndo_get_phys_port_id)(struct net_device *dev, +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -20,11 +20,17 @@ struct nf_flowtable_type { + struct module *owner; + }; + ++enum nf_flowtable_flags { ++ NF_FLOWTABLE_F_HW = 0x1, ++}; ++ + struct nf_flowtable { + struct list_head list; + struct rhashtable rhashtable; + const struct nf_flowtable_type *type; ++ u32 flags; + struct delayed_work gc_work; ++ possible_net_t ft_net; + }; + + enum flow_offload_tuple_dir { +@@ -68,6 +74,7 @@ struct flow_offload_tuple_rhash { + #define FLOW_OFFLOAD_SNAT 0x1 + #define FLOW_OFFLOAD_DNAT 0x2 + #define FLOW_OFFLOAD_DYING 0x4 ++#define FLOW_OFFLOAD_HW 0x8 + + struct flow_offload { + struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; +@@ -121,6 +128,22 @@ unsigned int nf_flow_offload_ip_hook(voi + unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow); ++ ++struct nf_flow_table_hw { ++ struct module *owner; ++ void (*add)(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct); ++ void (*del)(struct net *net, struct flow_offload *flow); ++}; ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload); ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload); ++ ++extern struct work_struct nf_flow_offload_hw_work; ++ + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ + MODULE_ALIAS("nf-flowtable-" __stringify(family)) + +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -1341,6 +1341,7 @@ enum nft_object_attributes { + * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32) + * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32) + * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64) ++ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32) + */ + enum nft_flowtable_attributes { + NFTA_FLOWTABLE_UNSPEC, +@@ -1350,6 +1351,7 @@ enum nft_flowtable_attributes { + NFTA_FLOWTABLE_USE, + NFTA_FLOWTABLE_HANDLE, + NFTA_FLOWTABLE_PAD, ++ NFTA_FLOWTABLE_FLAGS, + __NFTA_FLOWTABLE_MAX + }; + #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1) +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -686,6 +686,15 @@ config NF_FLOW_TABLE + + To compile it as a module, choose M here. + ++config NF_FLOW_TABLE_HW ++ tristate "Netfilter flow table hardware offload module" ++ depends on NF_FLOW_TABLE ++ help ++ This option adds hardware offload support for the flow table core ++ infrastructure. ++ ++ To compile it as a module, choose M here. ++ + config NETFILTER_XTABLES + tristate "Netfilter Xtables support (required for ip_tables)" + default m if NETFILTER_ADVANCED=n +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -116,6 +116,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t + nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o + + obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o ++obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o + + # generic X tables + obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -167,9 +167,16 @@ int flow_offload_add(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_add); + ++static inline bool nf_flow_in_hw(const struct flow_offload *flow) ++{ ++ return flow->flags & FLOW_OFFLOAD_HW; ++} ++ + static void flow_offload_del(struct nf_flowtable *flow_table, + struct flow_offload *flow) + { ++ struct net *net = read_pnet(&flow_table->ft_net); ++ + rhashtable_remove_fast(&flow_table->rhashtable, + &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, + nf_flow_offload_rhash_params); +@@ -177,6 +184,9 @@ static void flow_offload_del(struct nf_f + &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, + nf_flow_offload_rhash_params); + ++ if (nf_flow_in_hw(flow)) ++ nf_flow_offload_hw_del(net, flow); ++ + flow_offload_free(flow); + } + +@@ -263,6 +273,10 @@ static int nf_flow_offload_gc_step(struc + + flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); + ++ if (nf_flow_in_hw(flow) && ++ !nf_flow_is_dying(flow)) ++ continue; ++ + if (nf_flow_has_expired(flow) || + nf_flow_is_dying(flow)) + flow_offload_del(flow_table, flow); +@@ -399,10 +413,43 @@ int nf_flow_dnat_port(const struct flow_ + } + EXPORT_SYMBOL_GPL(nf_flow_dnat_port); + ++static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly; ++ ++static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ if (!rcu_access_pointer(nf_flow_table_hw_hook)) ++ request_module("nf-flow-table-hw"); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) ++ goto err_no_hw_offload; ++ ++ if (!try_module_get(offload->owner)) ++ goto err_no_hw_offload; ++ ++ rcu_read_unlock(); ++ ++ return 0; ++ ++err_no_hw_offload: ++ rcu_read_unlock(); ++ ++ return -EOPNOTSUPP; ++} ++ + int nf_flow_table_init(struct nf_flowtable *flowtable) + { + int err; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) { ++ err = nf_flow_offload_hw_init(flowtable); ++ if (err) ++ return err; ++ } ++ + INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + + err = rhashtable_init(&flowtable->rhashtable, +@@ -436,6 +483,8 @@ static void nf_flow_table_iterate_cleanu + { + nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); + flush_delayed_work(&flowtable->gc_work); ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ flush_work(&nf_flow_offload_hw_work); + } + + void nf_flow_table_cleanup(struct net *net, struct net_device *dev) +@@ -449,6 +498,26 @@ void nf_flow_table_cleanup(struct net *n + } + EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); + ++struct work_struct nf_flow_offload_hw_work; ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work); ++ ++/* Give the hardware workqueue the chance to remove entries from hardware.*/ ++static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ flush_work(&nf_flow_offload_hw_work); ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (!offload) { ++ rcu_read_unlock(); ++ return; ++ } ++ module_put(offload->owner); ++ rcu_read_unlock(); ++} ++ + void nf_flow_table_free(struct nf_flowtable *flow_table) + { + mutex_lock(&flowtable_lock); +@@ -458,9 +527,58 @@ void nf_flow_table_free(struct nf_flowta + nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); + WARN_ON(!nf_flow_offload_gc_step(flow_table)); + rhashtable_destroy(&flow_table->rhashtable); ++ if (flow_table->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_free(flow_table); + } + EXPORT_SYMBOL_GPL(nf_flow_table_free); + ++/* Must be called from user context. */ ++void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->add(net, flow, ct); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add); ++ ++/* Must be called from user context. */ ++void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ const struct nf_flow_table_hw *offload; ++ ++ rcu_read_lock(); ++ offload = rcu_dereference(nf_flow_table_hw_hook); ++ if (offload) ++ offload->del(net, flow); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del); ++ ++int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload) ++{ ++ if (rcu_access_pointer(nf_flow_table_hw_hook)) ++ return -EBUSY; ++ ++ rcu_assign_pointer(nf_flow_table_hw_hook, offload); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_register); ++ ++void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload) ++{ ++ WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload); ++ rcu_assign_pointer(nf_flow_table_hw_hook, NULL); ++ ++ synchronize_rcu(); ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister); ++ + static int nf_flow_table_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) + { +--- /dev/null ++++ b/net/netfilter/nf_flow_table_hw.c +@@ -0,0 +1,169 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/netdevice.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_conntrack.h> ++#include <net/netfilter/nf_conntrack_core.h> ++#include <net/netfilter/nf_conntrack_tuple.h> ++ ++static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock); ++static LIST_HEAD(flow_offload_hw_pending_list); ++ ++static DEFINE_MUTEX(nf_flow_offload_hw_mutex); ++ ++struct flow_offload_hw { ++ struct list_head list; ++ enum flow_offload_type type; ++ struct flow_offload *flow; ++ struct nf_conn *ct; ++ possible_net_t flow_hw_net; ++}; ++ ++static int do_flow_offload_hw(struct net *net, struct flow_offload *flow, ++ int type) ++{ ++ struct net_device *indev; ++ int ret, ifindex; ++ ++ ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx; ++ indev = dev_get_by_index(net, ifindex); ++ if (WARN_ON(!indev)) ++ return 0; ++ ++ mutex_lock(&nf_flow_offload_hw_mutex); ++ ret = indev->netdev_ops->ndo_flow_offload(type, flow); ++ mutex_unlock(&nf_flow_offload_hw_mutex); ++ ++ dev_put(indev); ++ ++ return ret; ++} ++ ++static void flow_offload_hw_work_add(struct flow_offload_hw *offload) ++{ ++ struct net *net; ++ int ret; ++ ++ if (nf_ct_is_dying(offload->ct)) ++ return; ++ ++ net = read_pnet(&offload->flow_hw_net); ++ ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD); ++ if (ret >= 0) ++ offload->flow->flags |= FLOW_OFFLOAD_HW; ++} ++ ++static void flow_offload_hw_work_del(struct flow_offload_hw *offload) ++{ ++ struct net *net = read_pnet(&offload->flow_hw_net); ++ ++ do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL); ++} ++ ++static void flow_offload_hw_work(struct work_struct *work) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ switch (offload->type) { ++ case FLOW_OFFLOAD_ADD: ++ flow_offload_hw_work_add(offload); ++ break; ++ case FLOW_OFFLOAD_DEL: ++ flow_offload_hw_work_del(offload); ++ break; ++ } ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++static void flow_offload_queue_work(struct flow_offload_hw *offload) ++{ ++ spin_lock_bh(&flow_offload_hw_pending_list_lock); ++ list_add_tail(&offload->list, &flow_offload_hw_pending_list); ++ spin_unlock_bh(&flow_offload_hw_pending_list_lock); ++ ++ schedule_work(&nf_flow_offload_hw_work); ++} ++ ++static void flow_offload_hw_add(struct net *net, struct flow_offload *flow, ++ struct nf_conn *ct) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ nf_conntrack_get(&ct->ct_general); ++ offload->type = FLOW_OFFLOAD_ADD; ++ offload->ct = ct; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static void flow_offload_hw_del(struct net *net, struct flow_offload *flow) ++{ ++ struct flow_offload_hw *offload; ++ ++ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC); ++ if (!offload) ++ return; ++ ++ offload->type = FLOW_OFFLOAD_DEL; ++ offload->ct = NULL; ++ offload->flow = flow; ++ write_pnet(&offload->flow_hw_net, net); ++ ++ flow_offload_queue_work(offload); ++} ++ ++static const struct nf_flow_table_hw flow_offload_hw = { ++ .add = flow_offload_hw_add, ++ .del = flow_offload_hw_del, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_table_hw_module_init(void) ++{ ++ INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work); ++ nf_flow_table_hw_register(&flow_offload_hw); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_table_hw_module_exit(void) ++{ ++ struct flow_offload_hw *offload, *next; ++ LIST_HEAD(hw_offload_pending); ++ ++ nf_flow_table_hw_unregister(&flow_offload_hw); ++ cancel_work_sync(&nf_flow_offload_hw_work); ++ ++ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) { ++ if (offload->ct) ++ nf_conntrack_put(&offload->ct->ct_general); ++ list_del(&offload->list); ++ kfree(offload); ++ } ++} ++ ++module_init(nf_flow_table_hw_module_init); ++module_exit(nf_flow_table_hw_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS("nf-flow-table-hw"); +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -4866,6 +4866,14 @@ static int nf_tables_flowtable_parse_hoo + if (err < 0) + goto err1; + ++ for (i = 0; i < n; i++) { ++ if (flowtable->data.flags & NF_FLOWTABLE_F_HW && ++ !dev_array[i]->netdev_ops->ndo_flow_offload) { ++ err = -EOPNOTSUPP; ++ goto err1; ++ } ++ } ++ + ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL); + if (!ops) { + err = -ENOMEM; +@@ -4996,10 +5004,19 @@ static int nf_tables_newflowtable(struct + } + + flowtable->data.type = type; ++ write_pnet(&flowtable->data.ft_net, net); ++ + err = type->init(&flowtable->data); + if (err < 0) + goto err3; + ++ if (nla[NFTA_FLOWTABLE_FLAGS]) { ++ flowtable->data.flags = ++ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); ++ if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW) ++ goto err4; ++ } ++ + err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK], + flowtable); + if (err < 0) +@@ -5097,7 +5114,8 @@ static int nf_tables_fill_flowtable_info + nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) || + nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || + nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle), +- NFTA_FLOWTABLE_PAD)) ++ NFTA_FLOWTABLE_PAD) || ++ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) + goto nla_put_failure; + + nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); +--- a/net/netfilter/nft_flow_offload.c ++++ b/net/netfilter/nft_flow_offload.c +@@ -110,6 +110,9 @@ static void nft_flow_offload_eval(const + if (ret < 0) + goto err_flow_add; + ++ if (flowtable->flags & NF_FLOWTABLE_F_HW) ++ nf_flow_offload_hw_add(nft_net(pkt), flow, ct); ++ + return; + + err_flow_add: |