diff options
Diffstat (limited to 'target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch')
-rw-r--r-- | target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch b/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch new file mode 100644 index 0000000000..6f36171605 --- /dev/null +++ b/target/linux/generic/backport-4.14/323-netfilter-flow-table-support-for-IPv4.patch @@ -0,0 +1,334 @@ +From: Pablo Neira Ayuso <pablo@netfilter.org> +Date: Sun, 7 Jan 2018 01:04:15 +0100 +Subject: [PATCH] netfilter: flow table support for IPv4 + +This patch adds the IPv4 flow table type, that implements the datapath +flow table to forward IPv4 traffic. Rationale is: + +1) Look up for the packet in the flow table, from the ingress hook. +2) If there's a hit, decrement ttl and pass it on to the neighbour layer + for transmission. +3) If there's a miss, packet is passed up to the classic forwarding + path. + +This patch also supports layer 3 source and destination NAT. + +Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> +--- + create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -77,6 +77,14 @@ config NF_TABLES_ARP + + endif # NF_TABLES + ++config NF_FLOW_TABLE_IPV4 ++ select NF_FLOW_TABLE ++ tristate "Netfilter flow table IPv4 module" ++ help ++ This option adds the flow table IPv4 support. ++ ++ To compile it as a module, choose M here. ++ + config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/ipv4/netfilter/Makefile ++++ b/net/ipv4/netfilter/Makefile +@@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi + obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o + obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o + ++# flow table support ++obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o ++ + # generic IP tables + obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o + +--- /dev/null ++++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c +@@ -0,0 +1,283 @@ ++#include <linux/kernel.h> ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/rhashtable.h> ++#include <linux/ip.h> ++#include <linux/netdevice.h> ++#include <net/ip.h> ++#include <net/neighbour.h> ++#include <net/netfilter/nf_flow_table.h> ++#include <net/netfilter/nf_tables.h> ++/* For layer 4 checksum field offset. */ ++#include <linux/tcp.h> ++#include <linux/udp.h> ++ ++static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct tcphdr *tcph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*tcph))) ++ return -1; ++ ++ tcph = (void *)(skb_network_header(skb) + thoff); ++ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, ++ __be32 addr, __be32 new_addr) ++{ ++ struct udphdr *udph; ++ ++ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || ++ skb_try_make_writable(skb, thoff + sizeof(*udph))) ++ return -1; ++ ++ udph = (void *)(skb_network_header(skb) + thoff); ++ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { ++ inet_proto_csum_replace4(&udph->check, skb, addr, ++ new_addr, true); ++ if (!udph->check) ++ udph->check = CSUM_MANGLED_0; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, ++ unsigned int thoff, __be32 addr, ++ __be32 new_addr) ++{ ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ case IPPROTO_UDP: ++ if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) ++ return NF_DROP; ++ break; ++ } ++ ++ return 0; ++} ++ ++static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ csum_replace4(&iph->check, addr, new_addr); ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ struct iphdr *iph, unsigned int thoff, ++ enum flow_offload_tuple_dir dir) ++{ ++ __be32 addr, new_addr; ++ ++ switch (dir) { ++ case FLOW_OFFLOAD_DIR_ORIGINAL: ++ addr = iph->daddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; ++ iph->daddr = new_addr; ++ break; ++ case FLOW_OFFLOAD_DIR_REPLY: ++ addr = iph->saddr; ++ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; ++ iph->saddr = new_addr; ++ break; ++ default: ++ return -1; ++ } ++ ++ return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); ++} ++ ++static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, ++ enum flow_offload_tuple_dir dir) ++{ ++ struct iphdr *iph = ip_hdr(skb); ++ unsigned int thoff = iph->ihl * 4; ++ ++ if (flow->flags & FLOW_OFFLOAD_SNAT && ++ (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ if (flow->flags & FLOW_OFFLOAD_DNAT && ++ (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || ++ nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) ++ return -1; ++ ++ return 0; ++} ++ ++static bool ip_has_options(unsigned int thoff) ++{ ++ return thoff != sizeof(struct iphdr); ++} ++ ++static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, ++ struct flow_offload_tuple *tuple) ++{ ++ struct flow_ports *ports; ++ unsigned int thoff; ++ struct iphdr *iph; ++ ++ if (!pskb_may_pull(skb, sizeof(*iph))) ++ return -1; ++ ++ iph = ip_hdr(skb); ++ thoff = iph->ihl * 4; ++ ++ if (ip_is_fragment(iph) || ++ unlikely(ip_has_options(thoff))) ++ return -1; ++ ++ if (iph->protocol != IPPROTO_TCP && ++ iph->protocol != IPPROTO_UDP) ++ return -1; ++ ++ thoff = iph->ihl * 4; ++ if (!pskb_may_pull(skb, thoff + sizeof(*ports))) ++ return -1; ++ ++ ports = (struct flow_ports *)(skb_network_header(skb) + thoff); ++ ++ tuple->src_v4.s_addr = iph->saddr; ++ tuple->dst_v4.s_addr = iph->daddr; ++ tuple->src_port = ports->source; ++ tuple->dst_port = ports->dest; ++ tuple->l3proto = AF_INET; ++ tuple->l4proto = iph->protocol; ++ tuple->iifidx = dev->ifindex; ++ ++ return 0; ++} ++ ++/* Based on ip_exceeds_mtu(). */ ++static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) ++{ ++ if (skb->len <= mtu) ++ return false; ++ ++ if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ++ return false; ++ ++ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) ++ return false; ++ ++ return true; ++} ++ ++static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) ++{ ++ u32 mtu; ++ ++ mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); ++ if (__nf_flow_exceeds_mtu(skb, mtu)) ++ return true; ++ ++ return false; ++} ++ ++static unsigned int ++nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ struct flow_offload_tuple_rhash *tuplehash; ++ struct nf_flowtable *flow_table = priv; ++ struct flow_offload_tuple tuple = {}; ++ enum flow_offload_tuple_dir dir; ++ struct flow_offload *flow; ++ struct net_device *outdev; ++ const struct rtable *rt; ++ struct iphdr *iph; ++ __be32 nexthop; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ return NF_ACCEPT; ++ ++ if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) ++ return NF_ACCEPT; ++ ++ tuplehash = flow_offload_lookup(flow_table, &tuple); ++ if (tuplehash == NULL) ++ return NF_ACCEPT; ++ ++ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); ++ if (!outdev) ++ return NF_ACCEPT; ++ ++ dir = tuplehash->tuple.dir; ++ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ++ ++ rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; ++ if (unlikely(nf_flow_exceeds_mtu(skb, rt))) ++ return NF_ACCEPT; ++ ++ if (skb_try_make_writable(skb, sizeof(*iph))) ++ return NF_DROP; ++ ++ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && ++ nf_flow_nat_ip(flow, skb, dir) < 0) ++ return NF_DROP; ++ ++ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ++ iph = ip_hdr(skb); ++ ip_decrease_ttl(iph); ++ ++ skb->dev = outdev; ++ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); ++ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); ++ ++ return NF_STOLEN; ++} ++ ++static struct nf_flowtable_type flowtable_ipv4 = { ++ .family = NFPROTO_IPV4, ++ .params = &nf_flow_offload_rhash_params, ++ .gc = nf_flow_offload_work_gc, ++ .hook = nf_flow_offload_ip_hook, ++ .owner = THIS_MODULE, ++}; ++ ++static int __init nf_flow_ipv4_module_init(void) ++{ ++ nft_register_flowtable_type(&flowtable_ipv4); ++ ++ return 0; ++} ++ ++static void __exit nf_flow_ipv4_module_exit(void) ++{ ++ nft_unregister_flowtable_type(&flowtable_ipv4); ++} ++ ++module_init(nf_flow_ipv4_module_init); ++module_exit(nf_flow_ipv4_module_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); ++MODULE_ALIAS_NF_FLOWTABLE(AF_INET); |