aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelix Fietkau <nbd@openwrt.org>2015-03-25 14:30:46 +0000
committerFelix Fietkau <nbd@openwrt.org>2015-03-25 14:30:46 +0000
commite11b30a9873ac1d3a3049546d856273b97ec171e (patch)
tree900fc2cbaa052c30a83997f578691537eeb0c14c
parenta325d7259ead56e938898b711630804043bdf25b (diff)
downloadupstream-e11b30a9873ac1d3a3049546d856273b97ec171e.tar.gz
upstream-e11b30a9873ac1d3a3049546d856273b97ec171e.tar.bz2
upstream-e11b30a9873ac1d3a3049546d856273b97ec171e.zip
kernel: merge upstream bgmac driver improvements
Signed-off-by: Felix Fietkau <nbd@openwrt.org> git-svn-id: svn://svn.openwrt.org/openwrt/trunk@44978 3c298f89-4303-0410-b956-a3cf2f4a3e73
-rw-r--r--target/linux/generic/patches-3.18/077-01-bgmac-fix-descriptor-frame-start-end-definitions.patch24
-rw-r--r--target/linux/generic/patches-3.18/077-02-bgmac-implement-GRO-and-use-build_skb.patch189
-rw-r--r--target/linux/generic/patches-3.18/077-03-bgmac-implement-scatter-gather-support.patch267
3 files changed, 480 insertions, 0 deletions
diff --git a/target/linux/generic/patches-3.18/077-01-bgmac-fix-descriptor-frame-start-end-definitions.patch b/target/linux/generic/patches-3.18/077-01-bgmac-fix-descriptor-frame-start-end-definitions.patch
new file mode 100644
index 0000000000..fdfae3aeff
--- /dev/null
+++ b/target/linux/generic/patches-3.18/077-01-bgmac-fix-descriptor-frame-start-end-definitions.patch
@@ -0,0 +1,24 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:40:06 +0100
+Subject: [PATCH] bgmac: fix descriptor frame start/end definitions
+
+The start-of-frame and end-of-frame bits were accidentally swapped.
+In the current code it does not make any difference, since they are
+always used together.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -345,8 +345,8 @@
+
+ #define BGMAC_DESC_CTL0_EOT 0x10000000 /* End of ring */
+ #define BGMAC_DESC_CTL0_IOC 0x20000000 /* IRQ on complete */
+-#define BGMAC_DESC_CTL0_SOF 0x40000000 /* Start of frame */
+-#define BGMAC_DESC_CTL0_EOF 0x80000000 /* End of frame */
++#define BGMAC_DESC_CTL0_EOF 0x40000000 /* End of frame */
++#define BGMAC_DESC_CTL0_SOF 0x80000000 /* Start of frame */
+ #define BGMAC_DESC_CTL1_LEN 0x00001FFF
+
+ #define BGMAC_PHY_NOREGS 0x1E
diff --git a/target/linux/generic/patches-3.18/077-02-bgmac-implement-GRO-and-use-build_skb.patch b/target/linux/generic/patches-3.18/077-02-bgmac-implement-GRO-and-use-build_skb.patch
new file mode 100644
index 0000000000..3636fb619a
--- /dev/null
+++ b/target/linux/generic/patches-3.18/077-02-bgmac-implement-GRO-and-use-build_skb.patch
@@ -0,0 +1,189 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:41:25 +0100
+Subject: [PATCH] bgmac: implement GRO and use build_skb
+
+This improves performance for routing and local rx
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -276,31 +276,31 @@ static int bgmac_dma_rx_skb_for_slot(str
+ struct bgmac_slot_info *slot)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
+- struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ struct bgmac_rx_header *rx;
++ void *buf;
+
+ /* Alloc skb */
+- skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
+- if (!skb)
++ buf = netdev_alloc_frag(BGMAC_RX_ALLOC_SIZE);
++ if (!buf)
+ return -ENOMEM;
+
+ /* Poison - if everything goes fine, hardware will overwrite it */
+- rx = (struct bgmac_rx_header *)skb->data;
++ rx = buf;
+ rx->len = cpu_to_le16(0xdead);
+ rx->flags = cpu_to_le16(0xbeef);
+
+ /* Map skb for the DMA */
+- dma_addr = dma_map_single(dma_dev, skb->data,
+- BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
++ dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
++ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dma_dev, dma_addr)) {
+ bgmac_err(bgmac, "DMA mapping error\n");
+- dev_kfree_skb(skb);
++ put_page(virt_to_head_page(buf));
+ return -ENOMEM;
+ }
+
+ /* Update the slot */
+- slot->skb = skb;
++ slot->buf = buf;
+ slot->dma_addr = dma_addr;
+
+ return 0;
+@@ -343,8 +343,9 @@ static int bgmac_dma_rx_read(struct bgma
+ while (ring->start != ring->end) {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
+- struct sk_buff *skb = slot->skb;
+- struct bgmac_rx_header *rx;
++ struct bgmac_rx_header *rx = slot->buf;
++ struct sk_buff *skb;
++ void *buf = slot->buf;
+ u16 len, flags;
+
+ /* Unmap buffer to make it accessible to the CPU */
+@@ -352,7 +353,6 @@ static int bgmac_dma_rx_read(struct bgma
+ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+ /* Get info from the header */
+- rx = (struct bgmac_rx_header *)skb->data;
+ len = le16_to_cpu(rx->len);
+ flags = le16_to_cpu(rx->flags);
+
+@@ -393,12 +393,13 @@ static int bgmac_dma_rx_read(struct bgma
+ dma_unmap_single(dma_dev, old_dma_addr,
+ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
++ skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
+ skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
+ skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, bgmac->net_dev);
+- netif_receive_skb(skb);
++ napi_gro_receive(&bgmac->napi, skb);
+ handled++;
+ } while (0);
+
+@@ -434,12 +435,11 @@ static bool bgmac_dma_unaligned(struct b
+ return false;
+ }
+
+-static void bgmac_dma_ring_free(struct bgmac *bgmac,
+- struct bgmac_dma_ring *ring)
++static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
++ struct bgmac_dma_ring *ring)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_slot_info *slot;
+- int size;
+ int i;
+
+ for (i = 0; i < ring->num_slots; i++) {
+@@ -451,23 +451,55 @@ static void bgmac_dma_ring_free(struct b
+ dev_kfree_skb(slot->skb);
+ }
+ }
++}
+
+- if (ring->cpu_base) {
+- /* Free ring of descriptors */
+- size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+- dma_free_coherent(dma_dev, size, ring->cpu_base,
+- ring->dma_base);
++static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
++ struct bgmac_dma_ring *ring)
++{
++ struct device *dma_dev = bgmac->core->dma_dev;
++ struct bgmac_slot_info *slot;
++ int i;
++
++ for (i = 0; i < ring->num_slots; i++) {
++ slot = &ring->slots[i];
++ if (!slot->buf)
++ continue;
++
++ if (slot->dma_addr)
++ dma_unmap_single(dma_dev, slot->dma_addr,
++ BGMAC_RX_BUF_SIZE,
++ DMA_FROM_DEVICE);
++ put_page(virt_to_head_page(slot->buf));
+ }
+ }
+
++static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
++ struct bgmac_dma_ring *ring)
++{
++ struct device *dma_dev = bgmac->core->dma_dev;
++ int size;
++
++ if (!ring->cpu_base)
++ return;
++
++ /* Free ring of descriptors */
++ size = ring->num_slots * sizeof(struct bgmac_dma_desc);
++ dma_free_coherent(dma_dev, size, ring->cpu_base,
++ ring->dma_base);
++}
++
+ static void bgmac_dma_free(struct bgmac *bgmac)
+ {
+ int i;
+
+- for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+- bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
+- for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+- bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
++ for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
++ bgmac_dma_tx_ring_free(bgmac, &bgmac->tx_ring[i]);
++ bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i]);
++ }
++ for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
++ bgmac_dma_rx_ring_free(bgmac, &bgmac->rx_ring[i]);
++ bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i]);
++ }
+ }
+
+ static int bgmac_dma_alloc(struct bgmac *bgmac)
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -362,6 +362,8 @@
+ #define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */
+ #define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */
+ #define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
++#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
++ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+ #define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */
+ #define BGMAC_BFL_ENETADM 0x0080 /* has ADMtek switch */
+@@ -383,7 +385,10 @@
+ #define ETHER_MAX_LEN 1518
+
+ struct bgmac_slot_info {
+- struct sk_buff *skb;
++ union {
++ struct sk_buff *skb;
++ void *buf;
++ };
+ dma_addr_t dma_addr;
+ };
+
diff --git a/target/linux/generic/patches-3.18/077-03-bgmac-implement-scatter-gather-support.patch b/target/linux/generic/patches-3.18/077-03-bgmac-implement-scatter-gather-support.patch
new file mode 100644
index 0000000000..5cb21a565a
--- /dev/null
+++ b/target/linux/generic/patches-3.18/077-03-bgmac-implement-scatter-gather-support.patch
@@ -0,0 +1,267 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:42:26 +0100
+Subject: [PATCH] bgmac: implement scatter/gather support
+
+Always use software checksumming, since the hardware does not have any
+checksum offload support.
+This significantly improves local TCP tx performance.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+ }
+
++static void
++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
++ int i, int len, u32 ctl0)
++{
++ struct bgmac_slot_info *slot;
++ struct bgmac_dma_desc *dma_desc;
++ u32 ctl1;
++
++ if (i == ring->num_slots - 1)
++ ctl0 |= BGMAC_DESC_CTL0_EOT;
++
++ ctl1 = len & BGMAC_DESC_CTL1_LEN;
++
++ slot = &ring->slots[i];
++ dma_desc = &ring->cpu_base[i];
++ dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
++ dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
++ dma_desc->ctl0 = cpu_to_le32(ctl0);
++ dma_desc->ctl1 = cpu_to_le32(ctl1);
++}
++
+ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring,
+ struct sk_buff *skb)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct net_device *net_dev = bgmac->net_dev;
+- struct bgmac_dma_desc *dma_desc;
+- struct bgmac_slot_info *slot;
+- u32 ctl0, ctl1;
++ struct bgmac_slot_info *slot = &ring->slots[ring->end];
+ int free_slots;
++ int nr_frags;
++ u32 flags;
++ int index = ring->end;
++ int i;
+
+ if (skb->len > BGMAC_DESC_CTL1_LEN) {
+ bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+- goto err_stop_drop;
++ goto err_drop;
+ }
+
++ if (skb->ip_summed == CHECKSUM_PARTIAL)
++ skb_checksum_help(skb);
++
++ nr_frags = skb_shinfo(skb)->nr_frags;
++
+ if (ring->start <= ring->end)
+ free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+ else
+ free_slots = ring->start - ring->end;
+- if (free_slots == 1) {
++
++ if (free_slots <= nr_frags + 1) {
+ bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ netif_stop_queue(net_dev);
+ return NETDEV_TX_BUSY;
+ }
+
+- slot = &ring->slots[ring->end];
+- slot->skb = skb;
+- slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
++ slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+- if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+- ring->mmio_base);
+- goto err_stop_drop;
+- }
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++ goto err_dma_head;
+
+- ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+- if (ring->end == ring->num_slots - 1)
+- ctl0 |= BGMAC_DESC_CTL0_EOT;
+- ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
++ flags = BGMAC_DESC_CTL0_SOF;
++ if (!nr_frags)
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++ bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
++ flags = 0;
++
++ for (i = 0; i < nr_frags; i++) {
++ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
++ int len = skb_frag_size(frag);
++
++ index = (index + 1) % BGMAC_TX_RING_SLOTS;
++ slot = &ring->slots[index];
++ slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
++ len, DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++ goto err_dma;
+
+- dma_desc = ring->cpu_base;
+- dma_desc += ring->end;
+- dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+- dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+- dma_desc->ctl0 = cpu_to_le32(ctl0);
+- dma_desc->ctl1 = cpu_to_le32(ctl1);
++ if (i == nr_frags - 1)
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++ bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
++ }
++
++ slot->skb = skb;
+
+ netdev_sent_queue(net_dev, skb->len);
+
+@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ /* Increase ring->end to point empty slot. We tell hardware the first
+ * slot it should *not* read.
+ */
+- if (++ring->end >= BGMAC_TX_RING_SLOTS)
+- ring->end = 0;
++ ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+ ring->index_base +
+ ring->end * sizeof(struct bgmac_dma_desc));
+
+- /* Always keep one slot free to allow detecting bugged calls. */
+- if (--free_slots == 1)
++ free_slots -= nr_frags + 1;
++ if (free_slots < 8)
+ netif_stop_queue(net_dev);
+
+ return NETDEV_TX_OK;
+
+-err_stop_drop:
+- netif_stop_queue(net_dev);
++err_dma:
++ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
++ DMA_TO_DEVICE);
++
++ while (i > 0) {
++ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
++ struct bgmac_slot_info *slot = &ring->slots[index];
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++
++ dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
++ }
++
++err_dma_head:
++ bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
++ ring->mmio_base);
++
++err_drop:
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
+
+ while (ring->start != empty_slot) {
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+
+- if (slot->skb) {
++ if (!slot->dma_addr) {
++ bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
++ ring->start, ring->end);
++ goto next;
++ }
++
++ if (ctl1 & BGMAC_DESC_CTL0_SOF)
+ /* Unmap no longer used buffer */
+- dma_unmap_single(dma_dev, slot->dma_addr,
+- slot->skb->len, DMA_TO_DEVICE);
+- slot->dma_addr = 0;
++ dma_unmap_single(dma_dev, slot->dma_addr, len,
++ DMA_TO_DEVICE);
++ else
++ dma_unmap_page(dma_dev, slot->dma_addr, len,
++ DMA_TO_DEVICE);
+
++ if (slot->skb) {
+ bytes_compl += slot->skb->len;
+ pkts_compl++;
+
+ /* Free memory! :) */
+ dev_kfree_skb(slot->skb);
+ slot->skb = NULL;
+- } else {
+- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+- ring->start, ring->end);
+ }
+
++next:
++ slot->dma_addr = 0;
+ if (++ring->start >= BGMAC_TX_RING_SLOTS)
+ ring->start = 0;
+ freed = true;
+ }
+
++ if (!pkts_compl)
++ return;
++
+ netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
+
+- if (freed && netif_queue_stopped(bgmac->net_dev))
++ if (netif_queue_stopped(bgmac->net_dev))
+ netif_wake_queue(bgmac->net_dev);
+ }
+
+@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
+ struct bgmac_dma_ring *ring)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
++ struct bgmac_dma_desc *dma_desc = ring->cpu_base;
+ struct bgmac_slot_info *slot;
+ int i;
+
+ for (i = 0; i < ring->num_slots; i++) {
++ int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
++
+ slot = &ring->slots[i];
+- if (slot->skb) {
+- if (slot->dma_addr)
+- dma_unmap_single(dma_dev, slot->dma_addr,
+- slot->skb->len, DMA_TO_DEVICE);
+- dev_kfree_skb(slot->skb);
+- }
++ dev_kfree_skb(slot->skb);
++
++ if (!slot->dma_addr)
++ continue;
++
++ if (slot->skb)
++ dma_unmap_single(dma_dev, slot->dma_addr,
++ len, DMA_TO_DEVICE);
++ else
++ dma_unmap_page(dma_dev, slot->dma_addr,
++ len, DMA_TO_DEVICE);
+ }
+ }
+
+@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
+ goto err_dma_free;
+ }
+
++ net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++ net_dev->hw_features = net_dev->features;
++ net_dev->vlan_features = net_dev->features;
++
+ err = register_netdev(bgmac->net_dev);
+ if (err) {
+ bgmac_err(bgmac, "Cannot register net device\n");