aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch')
-rw-r--r--target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch267
1 files changed, 267 insertions, 0 deletions
diff --git a/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch b/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch
new file mode 100644
index 0000000000..ceb25e85ad
--- /dev/null
+++ b/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch
@@ -0,0 +1,267 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 23 Mar 2015 02:42:26 +0100
+Subject: [PATCH] bgmac: implement scatter/gather support
+
+Always use software checksumming, since the hardware does not have any
+checksum offload support.
+This significantly improves local TCP tx performance.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+ }
+
++static void
++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
++ int i, int len, u32 ctl0)
++{
++ struct bgmac_slot_info *slot;
++ struct bgmac_dma_desc *dma_desc;
++ u32 ctl1;
++
++ if (i == ring->num_slots - 1)
++ ctl0 |= BGMAC_DESC_CTL0_EOT;
++
++ ctl1 = len & BGMAC_DESC_CTL1_LEN;
++
++ slot = &ring->slots[i];
++ dma_desc = &ring->cpu_base[i];
++ dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
++ dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
++ dma_desc->ctl0 = cpu_to_le32(ctl0);
++ dma_desc->ctl1 = cpu_to_le32(ctl1);
++}
++
+ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring,
+ struct sk_buff *skb)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct net_device *net_dev = bgmac->net_dev;
+- struct bgmac_dma_desc *dma_desc;
+- struct bgmac_slot_info *slot;
+- u32 ctl0, ctl1;
++ struct bgmac_slot_info *slot = &ring->slots[ring->end];
+ int free_slots;
++ int nr_frags;
++ u32 flags;
++ int index = ring->end;
++ int i;
+
+ if (skb->len > BGMAC_DESC_CTL1_LEN) {
+ bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+- goto err_stop_drop;
++ goto err_drop;
+ }
+
++ if (skb->ip_summed == CHECKSUM_PARTIAL)
++ skb_checksum_help(skb);
++
++ nr_frags = skb_shinfo(skb)->nr_frags;
++
+ if (ring->start <= ring->end)
+ free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+ else
+ free_slots = ring->start - ring->end;
+- if (free_slots == 1) {
++
++ if (free_slots <= nr_frags + 1) {
+ bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ netif_stop_queue(net_dev);
+ return NETDEV_TX_BUSY;
+ }
+
+- slot = &ring->slots[ring->end];
+- slot->skb = skb;
+- slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
++ slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+- if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+- ring->mmio_base);
+- goto err_stop_drop;
+- }
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++ goto err_dma_head;
+
+- ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+- if (ring->end == ring->num_slots - 1)
+- ctl0 |= BGMAC_DESC_CTL0_EOT;
+- ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
++ flags = BGMAC_DESC_CTL0_SOF;
++ if (!nr_frags)
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++ bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
++ flags = 0;
++
++ for (i = 0; i < nr_frags; i++) {
++ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
++ int len = skb_frag_size(frag);
++
++ index = (index + 1) % BGMAC_TX_RING_SLOTS;
++ slot = &ring->slots[index];
++ slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
++ len, DMA_TO_DEVICE);
++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++ goto err_dma;
+
+- dma_desc = ring->cpu_base;
+- dma_desc += ring->end;
+- dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+- dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+- dma_desc->ctl0 = cpu_to_le32(ctl0);
+- dma_desc->ctl1 = cpu_to_le32(ctl1);
++ if (i == nr_frags - 1)
++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++ bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
++ }
++
++ slot->skb = skb;
+
+ netdev_sent_queue(net_dev, skb->len);
+
+@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ /* Increase ring->end to point empty slot. We tell hardware the first
+ * slot it should *not* read.
+ */
+- if (++ring->end >= BGMAC_TX_RING_SLOTS)
+- ring->end = 0;
++ ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+ ring->index_base +
+ ring->end * sizeof(struct bgmac_dma_desc));
+
+- /* Always keep one slot free to allow detecting bugged calls. */
+- if (--free_slots == 1)
++ free_slots -= nr_frags + 1;
++ if (free_slots < 8)
+ netif_stop_queue(net_dev);
+
+ return NETDEV_TX_OK;
+
+-err_stop_drop:
+- netif_stop_queue(net_dev);
++err_dma:
++ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
++ DMA_TO_DEVICE);
++
++ while (i > 0) {
++ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
++ struct bgmac_slot_info *slot = &ring->slots[index];
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++
++ dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
++ }
++
++err_dma_head:
++ bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
++ ring->mmio_base);
++
++err_drop:
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
+
+ while (ring->start != empty_slot) {
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
++ u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
++ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+
+- if (slot->skb) {
++ if (!slot->dma_addr) {
++ bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
++ ring->start, ring->end);
++ goto next;
++ }
++
++ if (ctl1 & BGMAC_DESC_CTL0_SOF)
+ /* Unmap no longer used buffer */
+- dma_unmap_single(dma_dev, slot->dma_addr,
+- slot->skb->len, DMA_TO_DEVICE);
+- slot->dma_addr = 0;
++ dma_unmap_single(dma_dev, slot->dma_addr, len,
++ DMA_TO_DEVICE);
++ else
++ dma_unmap_page(dma_dev, slot->dma_addr, len,
++ DMA_TO_DEVICE);
+
++ if (slot->skb) {
+ bytes_compl += slot->skb->len;
+ pkts_compl++;
+
+ /* Free memory! :) */
+ dev_kfree_skb(slot->skb);
+ slot->skb = NULL;
+- } else {
+- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+- ring->start, ring->end);
+ }
+
++next:
++ slot->dma_addr = 0;
+ if (++ring->start >= BGMAC_TX_RING_SLOTS)
+ ring->start = 0;
+ freed = true;
+ }
+
++ if (!pkts_compl)
++ return;
++
+ netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
+
+- if (freed && netif_queue_stopped(bgmac->net_dev))
++ if (netif_queue_stopped(bgmac->net_dev))
+ netif_wake_queue(bgmac->net_dev);
+ }
+
+@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
+ struct bgmac_dma_ring *ring)
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
++ struct bgmac_dma_desc *dma_desc = ring->cpu_base;
+ struct bgmac_slot_info *slot;
+ int i;
+
+ for (i = 0; i < ring->num_slots; i++) {
++ int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
++
+ slot = &ring->slots[i];
+- if (slot->skb) {
+- if (slot->dma_addr)
+- dma_unmap_single(dma_dev, slot->dma_addr,
+- slot->skb->len, DMA_TO_DEVICE);
+- dev_kfree_skb(slot->skb);
+- }
++ dev_kfree_skb(slot->skb);
++
++ if (!slot->dma_addr)
++ continue;
++
++ if (slot->skb)
++ dma_unmap_single(dma_dev, slot->dma_addr,
++ len, DMA_TO_DEVICE);
++ else
++ dma_unmap_page(dma_dev, slot->dma_addr,
++ len, DMA_TO_DEVICE);
+ }
+ }
+
+@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
+ goto err_dma_free;
+ }
+
++ net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++ net_dev->hw_features = net_dev->features;
++ net_dev->vlan_features = net_dev->features;
++
+ err = register_netdev(bgmac->net_dev);
+ if (err) {
+ bgmac_err(bgmac, "Cannot register net device\n");