From: Felix Fietkau Date: Wed, 10 Feb 2021 19:39:23 +0100 Subject: [PATCH] netfilter: flowtable: rework ingress vlan matching When dealing with bridges with VLAN filtering and DSA/switchdev offload, the hardware could offload adding a VLAN tag configured in the bridge. Since there doesn't seem to be an easy way to detect that, this patch reworks the code to optionally match the last VLAN tag that would otherwise be inserted by the bridge. This matters when bypassing the bridge and attaching an ingress hook on a DSA port below it. Signed-off-by: Felix Fietkau --- --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -115,14 +115,15 @@ struct flow_offload_tuple { u8 l3proto; u8 l4proto; - struct { - u16 id; - __be16 proto; - } in_vlan[NF_FLOW_TABLE_VLAN_MAX]; /* All members above are keys for lookups, see flow_offload_hash(). */ struct { } __hash; + struct { + u16 id; + __be16 proto; + } in_vlan[NF_FLOW_TABLE_VLAN_MAX], in_pvid; + u8 dir:4, xmit_type:2, in_vlan_num:2; --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,12 +281,13 @@ static bool nf_flow_skb_vlan_protocol(co } static void nf_flow_vlan_pop(struct sk_buff *skb, - struct flow_offload_tuple_rhash *tuplehash) + struct flow_offload_tuple_rhash *tuplehash, + bool strip_pvid) { struct vlan_hdr *vlan_hdr; int i; - for (i = 0; i < tuplehash->tuple.in_vlan_num; i++) { + for (i = 0; i < tuplehash->tuple.in_vlan_num + strip_pvid; i++) { if (skb_vlan_tag_present(skb)) { __vlan_hwaccel_clear_tag(skb); continue; @@ -316,6 +317,31 @@ static unsigned int nf_flow_queue_xmit(s return NF_STOLEN; } +static bool +nf_flow_offload_check_vlan(struct flow_offload_tuple *tuple, + struct flow_offload_tuple *flow_tuple, + bool *strip_pvid) +{ + int i, cur = 0; + + if (flow_tuple->in_pvid.proto && + !memcmp(&tuple->in_vlan[0], &flow_tuple->in_pvid, + sizeof(tuple->in_vlan[0]))) + cur++; + + *strip_pvid = cur; + + for (i = 0; i < flow_tuple->in_vlan_num; i++, cur++) { + if (!memcmp(&tuple->in_vlan[cur], &flow_tuple->in_vlan[i], + sizeof(tuple->in_vlan[0]))) + continue; + + return false; + } + + return true; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -329,6 +355,7 @@ nf_flow_offload_ip_hook(void *priv, stru struct rtable *rt; unsigned int thoff; struct iphdr *iph; + bool strip_pvid; __be32 nexthop; u32 offset = 0; int ret; @@ -344,6 +371,10 @@ nf_flow_offload_ip_hook(void *priv, stru if (tuplehash == NULL) return NF_ACCEPT; + if (!nf_flow_offload_check_vlan(&tuple, &tuplehash->tuple, + &strip_pvid)) + return NF_ACCEPT; + dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); @@ -368,7 +399,7 @@ nf_flow_offload_ip_hook(void *priv, stru return NF_ACCEPT; } - nf_flow_vlan_pop(skb, tuplehash); + nf_flow_vlan_pop(skb, tuplehash, strip_pvid); thoff -= offset; if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) @@ -596,6 +627,7 @@ nf_flow_offload_ipv6_hook(void *priv, st struct net_device *outdev; struct ipv6hdr *ip6h; struct rt6_info *rt; + bool strip_pvid; u32 offset = 0; int ret; @@ -610,6 +642,10 @@ nf_flow_offload_ipv6_hook(void *priv, st if (tuplehash == NULL) return NF_ACCEPT; + if (!nf_flow_offload_check_vlan(&tuple, &tuplehash->tuple, + &strip_pvid)) + return NF_ACCEPT; + dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); @@ -630,7 +666,7 @@ nf_flow_offload_ipv6_hook(void *priv, st return NF_ACCEPT; } - nf_flow_vlan_pop(skb, tuplehash); + nf_flow_vlan_pop(skb, tuplehash, strip_pvid); if (skb_try_make_writable(skb, sizeof(*ip6h))) return NF_DROP;