From: Felix Fietkau Date: Tue, 20 Feb 2018 15:56:02 +0100 Subject: [PATCH] netfilter: add xt_OFFLOAD target Signed-off-by: Felix Fietkau --- create mode 100644 net/netfilter/xt_OFFLOAD.c --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -76,8 +76,6 @@ config NF_TABLES_ARP help This option enables the ARP support for nf_tables. -endif # NF_TABLES - config NF_FLOW_TABLE_IPV4 tristate "Netfilter flow table IPv4 module" depends on NF_FLOW_TABLE @@ -86,6 +84,8 @@ config NF_FLOW_TABLE_IPV4 To compile it as a module, choose M here. +endif # NF_TABLES + config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -97,7 +97,6 @@ config NFT_FIB_IPV6 multicast or blackhole. endif # NF_TABLES_IPV6 -endif # NF_TABLES config NF_FLOW_TABLE_IPV6 tristate "Netfilter flow table IPv6 module" @@ -107,6 +106,8 @@ config NF_FLOW_TABLE_IPV6 To compile it as a module, choose M here. +endif # NF_TABLES + config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -671,8 +671,6 @@ config NFT_FIB_NETDEV endif # NF_TABLES_NETDEV -endif # NF_TABLES - config NF_FLOW_TABLE_INET tristate "Netfilter flow table mixed IPv4/IPv6 module" depends on NF_FLOW_TABLE @@ -681,11 +679,12 @@ config NF_FLOW_TABLE_INET To compile it as a module, choose M here. +endif # NF_TABLES + config NF_FLOW_TABLE tristate "Netfilter flow table module" depends on NETFILTER_INGRESS depends on NF_CONNTRACK - depends on NF_TABLES help This option adds the flow table core infrastructure. @@ -974,6 +973,15 @@ config NETFILTER_XT_TARGET_NOTRACK depends on NETFILTER_ADVANCED select NETFILTER_XT_TARGET_CT +config NETFILTER_XT_TARGET_FLOWOFFLOAD + tristate '"FLOWOFFLOAD" target support' + depends on NF_FLOW_TABLE + depends on NETFILTER_INGRESS + help + This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload + module to speed up processing of packets by bypassing the usual + netfilter chains + config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -134,6 +134,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o --- /dev/null +++ b/net/netfilter/xt_FLOWOFFLOAD.c @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2018 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct nf_flowtable nf_flowtable; +static HLIST_HEAD(hooks); +static DEFINE_SPINLOCK(hooks_lock); +static struct delayed_work hook_work; + +struct xt_flowoffload_hook { + struct hlist_node list; + struct nf_hook_ops ops; + struct net *net; + bool registered; + bool used; +}; + +static unsigned int +xt_flowoffload_net_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + return nf_flow_offload_ip_hook(priv, skb, state); + case htons(ETH_P_IPV6): + return nf_flow_offload_ipv6_hook(priv, skb, state); + } + + return NF_ACCEPT; +} + +static int +xt_flowoffload_create_hook(struct net_device *dev) +{ + struct xt_flowoffload_hook *hook; + struct nf_hook_ops *ops; + + hook = kzalloc(sizeof(*hook), GFP_ATOMIC); + if (!hook) + return -ENOMEM; + + ops = &hook->ops; + ops->pf = NFPROTO_NETDEV; + ops->hooknum = NF_NETDEV_INGRESS; + ops->priority = 10; + ops->priv = &nf_flowtable; + ops->hook = xt_flowoffload_net_hook; + ops->dev = dev; + + hlist_add_head(&hook->list, &hooks); + mod_delayed_work(system_power_efficient_wq, &hook_work, 0); + + return 0; +} + +static struct xt_flowoffload_hook * +flow_offload_lookup_hook(struct net_device *dev) +{ + struct xt_flowoffload_hook *hook; + + hlist_for_each_entry(hook, &hooks, list) { + if (hook->ops.dev == dev) + return hook; + } + + return NULL; +} + +static void +xt_flowoffload_check_device(struct net_device *dev) +{ + struct xt_flowoffload_hook *hook; + + spin_lock_bh(&hooks_lock); + hook = flow_offload_lookup_hook(dev); + if (hook) + hook->used = true; + else + xt_flowoffload_create_hook(dev); + spin_unlock_bh(&hooks_lock); +} + +static void +xt_flowoffload_register_hooks(void) +{ + struct xt_flowoffload_hook *hook; + +restart: + hlist_for_each_entry(hook, &hooks, list) { + if (hook->registered) + continue; + + hook->registered = true; + hook->net = dev_net(hook->ops.dev); + spin_unlock_bh(&hooks_lock); + nf_register_net_hook(hook->net, &hook->ops); + spin_lock_bh(&hooks_lock); + goto restart; + } + +} + +static void +xt_flowoffload_cleanup_hooks(void) +{ + struct xt_flowoffload_hook *hook; + +restart: + hlist_for_each_entry(hook, &hooks, list) { + if (hook->used || !hook->registered) + continue; + + hlist_del(&hook->list); + spin_unlock_bh(&hooks_lock); + nf_unregister_net_hook(hook->net, &hook->ops); + kfree(hook); + spin_lock_bh(&hooks_lock); + goto restart; + } + +} + +static void +xt_flowoffload_check_hook(struct flow_offload *flow, void *data) +{ + struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple; + struct xt_flowoffload_hook *hook; + bool *found = data; + + spin_lock_bh(&hooks_lock); + hlist_for_each_entry(hook, &hooks, list) { + if (hook->ops.dev->ifindex != tuple->iifidx && + hook->ops.dev->ifindex != tuple->oifidx) + continue; + + hook->used = true; + *found = true; + } + spin_unlock_bh(&hooks_lock); +} + +static void +xt_flowoffload_hook_work(struct work_struct *work) +{ + struct xt_flowoffload_hook *hook; + bool found = false; + int err; + + spin_lock_bh(&hooks_lock); + xt_flowoffload_register_hooks(); + hlist_for_each_entry(hook, &hooks, list) + hook->used = false; + spin_unlock_bh(&hooks_lock); + + err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook, + &found); + if (err && err != -EAGAIN) + goto out; + + spin_lock_bh(&hooks_lock); + xt_flowoffload_cleanup_hooks(); + spin_unlock_bh(&hooks_lock); + +out: + if (found) + queue_delayed_work(system_power_efficient_wq, &hook_work, HZ); +} + +static bool +xt_flowoffload_skip(struct sk_buff *skb, int family) +{ + if (skb_sec_path(skb)) + return true; + + if (family == NFPROTO_IPV4) { + const struct ip_options *opt = &(IPCB(skb)->opt); + + if (unlikely(opt->optlen)) + return true; + } + + return false; +} + +static struct dst_entry * +xt_flowoffload_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir, + const struct xt_action_param *par, int ifindex) +{ + struct dst_entry *dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + switch (xt_family(par)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; + fl.u.ip4.flowi4_oif = ifindex; + break; + case NFPROTO_IPV6: + fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6; + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; + fl.u.ip6.flowi6_oif = ifindex; + break; + } + + nf_route(xt_net(par), &dst, &fl, false, xt_family(par)); + + return dst; +} + +static int +xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct, + const struct xt_action_param *par, + struct nf_flow_route *route, enum ip_conntrack_dir dir) +{ + struct dst_entry *this_dst, *other_dst; + + this_dst = xt_flowoffload_dst(ct, !dir, par, xt_out(par)->ifindex); + other_dst = xt_flowoffload_dst(ct, dir, par, xt_in(par)->ifindex); + + route->tuple[dir].dst = this_dst; + route->tuple[!dir].dst = other_dst; + + if (!this_dst || !other_dst) + return -ENOENT; + + if (dst_xfrm(this_dst) || dst_xfrm(other_dst)) + return -EINVAL; + + return 0; +} + +static unsigned int +flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_flowoffload_target_info *info = par->targinfo; + struct tcphdr _tcph, *tcph = NULL; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + struct nf_flow_route route; + struct flow_offload *flow = NULL; + struct nf_conn *ct; + struct net *net; + + if (xt_flowoffload_skip(skb, xt_family(par))) + return XT_CONTINUE; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return XT_CONTINUE; + + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { + case IPPROTO_TCP: + if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) + return XT_CONTINUE; + + tcph = skb_header_pointer(skb, par->thoff, + sizeof(_tcph), &_tcph); + if (unlikely(!tcph || tcph->fin || tcph->rst)) + return XT_CONTINUE; + break; + case IPPROTO_UDP: + break; + default: + return XT_CONTINUE; + } + + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || + ct->status & IPS_SEQ_ADJUST) + return XT_CONTINUE; + + if (!nf_ct_is_confirmed(ct)) + return XT_CONTINUE; + + if (!xt_in(par) || !xt_out(par)) + return XT_CONTINUE; + + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) + return XT_CONTINUE; + + dir = CTINFO2DIR(ctinfo); + + if (xt_flowoffload_route(skb, ct, par, &route, dir) == 0) + flow = flow_offload_alloc(ct, &route); + + dst_release(route.tuple[dir].dst); + dst_release(route.tuple[!dir].dst); + + if (!flow) + goto err_flow_route; + + if (tcph) { + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + } + + if (flow_offload_add(&nf_flowtable, flow) < 0) + goto err_flow_add; + + xt_flowoffload_check_device(xt_in(par)); + xt_flowoffload_check_device(xt_out(par)); + + net = read_pnet(&nf_flowtable.ft_net); + if (!net) + write_pnet(&nf_flowtable.ft_net, xt_net(par)); + + if (info->flags & XT_FLOWOFFLOAD_HW) + nf_flow_offload_hw_add(xt_net(par), flow, ct); + + return XT_CONTINUE; + +err_flow_add: + flow_offload_free(flow); +err_flow_route: + clear_bit(IPS_OFFLOAD_BIT, &ct->status); + return XT_CONTINUE; +} + + +static int flowoffload_chk(const struct xt_tgchk_param *par) +{ + struct xt_flowoffload_target_info *info = par->targinfo; + + if (info->flags & ~XT_FLOWOFFLOAD_MASK) + return -EINVAL; + + return 0; +} + +static struct xt_target offload_tg_reg __read_mostly = { + .family = NFPROTO_UNSPEC, + .name = "FLOWOFFLOAD", + .revision = 0, + .targetsize = sizeof(struct xt_flowoffload_target_info), + .usersize = sizeof(struct xt_flowoffload_target_info), + .checkentry = flowoffload_chk, + .target = flowoffload_tg, + .me = THIS_MODULE, +}; + +static int xt_flowoffload_table_init(struct nf_flowtable *table) +{ + table->flags = NF_FLOWTABLE_F_HW; + nf_flow_table_init(table); + return 0; +} + +static void xt_flowoffload_table_cleanup(struct nf_flowtable *table) +{ + nf_flow_table_free(table); +} + +static int flow_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct xt_flowoffload_hook *hook = NULL; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + + spin_lock_bh(&hooks_lock); + hook = flow_offload_lookup_hook(dev); + if (hook) { + hlist_del(&hook->list); + } + spin_unlock_bh(&hooks_lock); + if (hook) { + nf_unregister_net_hook(hook->net, &hook->ops); + kfree(hook); + } + + nf_flow_table_cleanup(dev_net(dev), dev); + + return NOTIFY_DONE; +} + +static struct notifier_block flow_offload_netdev_notifier = { + .notifier_call = flow_offload_netdev_event, +}; + +static int __init xt_flowoffload_tg_init(void) +{ + int ret; + + register_netdevice_notifier(&flow_offload_netdev_notifier); + + INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work); + + ret = xt_flowoffload_table_init(&nf_flowtable); + if (ret) + return ret; + + ret = xt_register_target(&offload_tg_reg); + if (ret) + xt_flowoffload_table_cleanup(&nf_flowtable); + + return ret; +} + +static void __exit xt_flowoffload_tg_exit(void) +{ + xt_unregister_target(&offload_tg_reg); + xt_flowoffload_table_cleanup(&nf_flowtable); + unregister_netdevice_notifier(&flow_offload_netdev_notifier); +} + +MODULE_LICENSE("GPL"); +module_init(xt_flowoffload_tg_init); +module_exit(xt_flowoffload_tg_exit); --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include --- /dev/null +++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _XT_FLOWOFFLOAD_H +#define _XT_FLOWOFFLOAD_H + +#include + +enum { + XT_FLOWOFFLOAD_HW = 1 << 0, + + XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW +}; + +struct xt_flowoffload_target_info { + __u32 flags; +}; + +#endif /* _XT_FLOWOFFLOAD_H */