From ad4232e3e986d89ac49f01bb2cec44d1e66d6969 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 23 Jan 2018 20:52:45 +0100 Subject: ar71xx: add unaligned access hacks for VXLAN Gives a ~5% performance gain. Signed-off-by: Matthias Schiffer --- .../patches-4.9/910-unaligned_access_hacks.patch | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch') diff --git a/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch b/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch index 92f0a67658..f26af24d14 100644 --- a/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch +++ b/target/linux/ar71xx/patches-4.9/910-unaligned_access_hacks.patch @@ -886,3 +886,95 @@ }; if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1811,15 +1811,15 @@ static int vxlan_build_skb(struct sk_buf + goto out_free; + + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); +- vxh->vx_flags = VXLAN_HF_VNI; +- vxh->vx_vni = vxlan_vni_field(vni); ++ net_hdr_word(&vxh->vx_flags) = VXLAN_HF_VNI; ++ net_hdr_word(&vxh->vx_vni) = vxlan_vni_field(vni); + + if (type & SKB_GSO_TUNNEL_REMCSUM) { + unsigned int start; + + start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr); +- vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset); +- vxh->vx_flags |= VXLAN_HF_RCO; ++ net_hdr_word(&vxh->vx_vni) |= vxlan_compute_rco(start, skb->csum_offset); ++ net_hdr_word(&vxh->vx_flags) |= VXLAN_HF_RCO; + + if (!skb_is_gso(skb)) { + skb->ip_summed = CHECKSUM_NONE; +--- a/include/linux/etherdevice.h ++++ b/include/linux/etherdevice.h +@@ -435,7 +435,7 @@ static inline bool is_etherdev_addr(cons + * @b: Pointer to Ethernet header + * + * Compare two Ethernet headers, returns 0 if equal. +- * This assumes that the network header (i.e., IP header) is 4-byte ++ * This assumes that the network header (i.e., IP header) is 2-byte + * aligned OR the platform can handle unaligned access. This is the + * case for all packets coming into netif_receive_skb or similar + * entry points. +@@ -458,11 +458,12 @@ static inline unsigned long compare_ethe + fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); + return fold; + #else +- u32 *a32 = (u32 *)((u8 *)a + 2); +- u32 *b32 = (u32 *)((u8 *)b + 2); ++ const u16 *a16 = a; ++ const u16 *b16 = b; + +- return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | +- (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); ++ return (a16[0] ^ b16[0]) | (a16[1] ^ b16[1]) | (a16[2] ^ b16[2]) | ++ (a16[3] ^ b16[3]) | (a16[4] ^ b16[4]) | (a16[5] ^ b16[5]) | ++ (a16[6] ^ b16[6]); + #endif + } + +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -215,7 +215,7 @@ struct sk_buff **tcp_gro_receive(struct + + th2 = tcp_hdr(p); + +- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { ++ if (net_hdr_word(&th->source) ^ net_hdr_word(&th2->source)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } +@@ -233,8 +233,8 @@ found: + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) +- flush |= *(u32 *)((u8 *)th + i) ^ +- *(u32 *)((u8 *)th2 + i); ++ flush |= net_hdr_word((u8 *)th + i) ^ ++ net_hdr_word((u8 *)th2 + i); + + /* When we receive our second frame we can made a decision on if we + * continue this flow as an atomic flow with a fixed ID or if we use +--- a/net/ipv6/netfilter/ip6table_mangle.c ++++ b/net/ipv6/netfilter/ip6table_mangle.c +@@ -58,7 +58,7 @@ ip6t_mangle_out(struct sk_buff *skb, con + hop_limit = ipv6_hdr(skb)->hop_limit; + + /* flowlabel and prio (includes version, which shouldn't change either */ +- flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ++ flowlabel = net_hdr_word(ipv6_hdr(skb)); + + ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); + +@@ -67,7 +67,7 @@ ip6t_mangle_out(struct sk_buff *skb, con + !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit || +- flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { ++ flowlabel != net_hdr_word(ipv6_hdr(skb)))) { + err = ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); -- cgit v1.2.3