diff options
Diffstat (limited to 'target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch')
-rw-r--r-- | target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch b/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch new file mode 100644 index 0000000000..ceb25e85ad --- /dev/null +++ b/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch @@ -0,0 +1,267 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Mon, 23 Mar 2015 02:42:26 +0100 +Subject: [PATCH] bgmac: implement scatter/gather support + +Always use software checksumming, since the hardware does not have any +checksum offload support. +This significantly improves local TCP tx performance. + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl); + } + ++static void ++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring, ++ int i, int len, u32 ctl0) ++{ ++ struct bgmac_slot_info *slot; ++ struct bgmac_dma_desc *dma_desc; ++ u32 ctl1; ++ ++ if (i == ring->num_slots - 1) ++ ctl0 |= BGMAC_DESC_CTL0_EOT; ++ ++ ctl1 = len & BGMAC_DESC_CTL1_LEN; ++ ++ slot = &ring->slots[i]; ++ dma_desc = &ring->cpu_base[i]; ++ dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); ++ dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); ++ dma_desc->ctl0 = cpu_to_le32(ctl0); ++ dma_desc->ctl1 = cpu_to_le32(ctl1); ++} ++ + static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, + struct bgmac_dma_ring *ring, + struct sk_buff *skb) + { + struct device *dma_dev = bgmac->core->dma_dev; + struct net_device *net_dev = bgmac->net_dev; +- struct bgmac_dma_desc *dma_desc; +- struct bgmac_slot_info *slot; +- u32 ctl0, ctl1; ++ struct bgmac_slot_info *slot = &ring->slots[ring->end]; + int free_slots; ++ int nr_frags; ++ u32 flags; ++ int index = ring->end; ++ int i; + + if (skb->len > BGMAC_DESC_CTL1_LEN) { + bgmac_err(bgmac, "Too long skb (%d)\n", skb->len); +- goto err_stop_drop; ++ goto err_drop; + } + ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ skb_checksum_help(skb); ++ ++ nr_frags = skb_shinfo(skb)->nr_frags; ++ + if (ring->start <= ring->end) + free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS; + else + free_slots = ring->start - ring->end; +- if (free_slots == 1) { ++ ++ if (free_slots <= nr_frags + 1) { + bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n"); + netif_stop_queue(net_dev); + return NETDEV_TX_BUSY; + } + +- slot = &ring->slots[ring->end]; +- slot->skb = skb; +- slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len, ++ slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); +- if (dma_mapping_error(dma_dev, slot->dma_addr)) { +- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", +- ring->mmio_base); +- goto err_stop_drop; +- } ++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) ++ goto err_dma_head; + +- ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF; +- if (ring->end == ring->num_slots - 1) +- ctl0 |= BGMAC_DESC_CTL0_EOT; +- ctl1 = skb->len & BGMAC_DESC_CTL1_LEN; ++ flags = BGMAC_DESC_CTL0_SOF; ++ if (!nr_frags) ++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; ++ ++ bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags); ++ flags = 0; ++ ++ for (i = 0; i < nr_frags; i++) { ++ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; ++ int len = skb_frag_size(frag); ++ ++ index = (index + 1) % BGMAC_TX_RING_SLOTS; ++ slot = &ring->slots[index]; ++ slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0, ++ len, DMA_TO_DEVICE); ++ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr))) ++ goto err_dma; + +- dma_desc = ring->cpu_base; +- dma_desc += ring->end; +- dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr)); +- dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr)); +- dma_desc->ctl0 = cpu_to_le32(ctl0); +- dma_desc->ctl1 = cpu_to_le32(ctl1); ++ if (i == nr_frags - 1) ++ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC; ++ ++ bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags); ++ } ++ ++ slot->skb = skb; + + netdev_sent_queue(net_dev, skb->len); + +@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru + /* Increase ring->end to point empty slot. We tell hardware the first + * slot it should *not* read. + */ +- if (++ring->end >= BGMAC_TX_RING_SLOTS) +- ring->end = 0; ++ ring->end = (index + 1) % BGMAC_TX_RING_SLOTS; + bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX, + ring->index_base + + ring->end * sizeof(struct bgmac_dma_desc)); + +- /* Always keep one slot free to allow detecting bugged calls. */ +- if (--free_slots == 1) ++ free_slots -= nr_frags + 1; ++ if (free_slots < 8) + netif_stop_queue(net_dev); + + return NETDEV_TX_OK; + +-err_stop_drop: +- netif_stop_queue(net_dev); ++err_dma: ++ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), ++ DMA_TO_DEVICE); ++ ++ while (i > 0) { ++ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; ++ struct bgmac_slot_info *slot = &ring->slots[index]; ++ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); ++ int len = ctl1 & BGMAC_DESC_CTL1_LEN; ++ ++ dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); ++ } ++ ++err_dma_head: ++ bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n", ++ ring->mmio_base); ++ ++err_drop: + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } +@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm + + while (ring->start != empty_slot) { + struct bgmac_slot_info *slot = &ring->slots[ring->start]; ++ u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1); ++ int len = ctl1 & BGMAC_DESC_CTL1_LEN; + +- if (slot->skb) { ++ if (!slot->dma_addr) { ++ bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", ++ ring->start, ring->end); ++ goto next; ++ } ++ ++ if (ctl1 & BGMAC_DESC_CTL0_SOF) + /* Unmap no longer used buffer */ +- dma_unmap_single(dma_dev, slot->dma_addr, +- slot->skb->len, DMA_TO_DEVICE); +- slot->dma_addr = 0; ++ dma_unmap_single(dma_dev, slot->dma_addr, len, ++ DMA_TO_DEVICE); ++ else ++ dma_unmap_page(dma_dev, slot->dma_addr, len, ++ DMA_TO_DEVICE); + ++ if (slot->skb) { + bytes_compl += slot->skb->len; + pkts_compl++; + + /* Free memory! :) */ + dev_kfree_skb(slot->skb); + slot->skb = NULL; +- } else { +- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n", +- ring->start, ring->end); + } + ++next: ++ slot->dma_addr = 0; + if (++ring->start >= BGMAC_TX_RING_SLOTS) + ring->start = 0; + freed = true; + } + ++ if (!pkts_compl) ++ return; ++ + netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl); + +- if (freed && netif_queue_stopped(bgmac->net_dev)) ++ if (netif_queue_stopped(bgmac->net_dev)) + netif_wake_queue(bgmac->net_dev); + } + +@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc + struct bgmac_dma_ring *ring) + { + struct device *dma_dev = bgmac->core->dma_dev; ++ struct bgmac_dma_desc *dma_desc = ring->cpu_base; + struct bgmac_slot_info *slot; + int i; + + for (i = 0; i < ring->num_slots; i++) { ++ int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN; ++ + slot = &ring->slots[i]; +- if (slot->skb) { +- if (slot->dma_addr) +- dma_unmap_single(dma_dev, slot->dma_addr, +- slot->skb->len, DMA_TO_DEVICE); +- dev_kfree_skb(slot->skb); +- } ++ dev_kfree_skb(slot->skb); ++ ++ if (!slot->dma_addr) ++ continue; ++ ++ if (slot->skb) ++ dma_unmap_single(dma_dev, slot->dma_addr, ++ len, DMA_TO_DEVICE); ++ else ++ dma_unmap_page(dma_dev, slot->dma_addr, ++ len, DMA_TO_DEVICE); + } + } + +@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic + goto err_dma_free; + } + ++ net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ++ net_dev->hw_features = net_dev->features; ++ net_dev->vlan_features = net_dev->features; ++ + err = register_netdev(bgmac->net_dev); + if (err) { + bgmac_err(bgmac, "Cannot register net device\n"); |