diff options
author | Álvaro Fernández Rojas <noltari@gmail.com> | 2021-02-17 19:07:23 +0100 |
---|---|---|
committer | Álvaro Fernández Rojas <noltari@gmail.com> | 2021-02-17 20:40:16 +0100 |
commit | f323dec4f866d9ea75eae11c6762d0144d100628 (patch) | |
tree | c40f33e9a89e935b32e6a1520f02019200e7cc17 /target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch | |
parent | 5bb9954826229782d4d285d65dfcca8a1c9e20d9 (diff) | |
download | upstream-f323dec4f866d9ea75eae11c6762d0144d100628.tar.gz upstream-f323dec4f866d9ea75eae11c6762d0144d100628.tar.bz2 upstream-f323dec4f866d9ea75eae11c6762d0144d100628.zip |
bcm63xx: add kernel 5.10 support
Runtime-tested on Comtrend AR-5387un.
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Diffstat (limited to 'target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch')
-rw-r--r-- | target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch | 347 |
1 files changed, 347 insertions, 0 deletions
diff --git a/target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch b/target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch new file mode 100644 index 0000000000..08191d47ce --- /dev/null +++ b/target/linux/bcm63xx/patches-5.10/025-v5.12-bcm63xx_enet-convert-to-build_skb.patch @@ -0,0 +1,347 @@ +From d27de0ef5ef995df2cc5f5c006c0efcf0a62b6af Mon Sep 17 00:00:00 2001 +From: Sieng Piaw Liew <liew.s.piaw@gmail.com> +Date: Wed, 6 Jan 2021 22:42:07 +0800 +Subject: [PATCH 6/7] bcm63xx_enet: convert to build_skb + +We can increase the efficiency of rx path by using buffers to receive +packets then build SKBs around them just before passing into the network +stack. In contrast, preallocating SKBs too early reduces CPU cache +efficiency. + +Check if we're in NAPI context when refilling RX. Normally we're almost +always running in NAPI context. Dispatch to napi_alloc_frag directly +instead of relying on netdev_alloc_frag which does the same but +with the overhead of local_bh_disable/enable. + +Tested on BCM6328 320 MHz and iperf3 -M 512 to measure packet/sec +performance. Included netif_receive_skb_list and NET_IP_ALIGN +optimizations. + +Before: +[ ID] Interval Transfer Bandwidth Retr +[ 4] 0.00-10.00 sec 49.9 MBytes 41.9 Mbits/sec 197 sender +[ 4] 0.00-10.00 sec 49.3 MBytes 41.3 Mbits/sec receiver + +After: +[ ID] Interval Transfer Bandwidth Retr +[ 4] 0.00-30.00 sec 171 MBytes 47.8 Mbits/sec 272 sender +[ 4] 0.00-30.00 sec 170 MBytes 47.6 Mbits/sec receiver + +Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com> +Acked-by: Florian Fainelli <f.fainelli@gmail.com> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +--- + drivers/net/ethernet/broadcom/bcm63xx_enet.c | 111 ++++++++++--------- + drivers/net/ethernet/broadcom/bcm63xx_enet.h | 14 ++- + 2 files changed, 71 insertions(+), 54 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c ++++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c +@@ -220,7 +220,7 @@ static void bcm_enet_mdio_write_mii(stru + /* + * refill rx queue + */ +-static int bcm_enet_refill_rx(struct net_device *dev) ++static int bcm_enet_refill_rx(struct net_device *dev, bool napi_mode) + { + struct bcm_enet_priv *priv; + +@@ -228,29 +228,29 @@ static int bcm_enet_refill_rx(struct net + + while (priv->rx_desc_count < priv->rx_ring_size) { + struct bcm_enet_desc *desc; +- struct sk_buff *skb; +- dma_addr_t p; + int desc_idx; + u32 len_stat; + + desc_idx = priv->rx_dirty_desc; + desc = &priv->rx_desc_cpu[desc_idx]; + +- if (!priv->rx_skb[desc_idx]) { +- if (priv->enet_is_sw) +- skb = netdev_alloc_skb_ip_align(dev, priv->rx_skb_size); ++ if (!priv->rx_buf[desc_idx]) { ++ void *buf; ++ ++ if (likely(napi_mode)) ++ buf = napi_alloc_frag(priv->rx_frag_size); + else +- skb = netdev_alloc_skb(dev, priv->rx_skb_size); +- if (!skb) ++ buf = netdev_alloc_frag(priv->rx_frag_size); ++ if (unlikely(!buf)) + break; +- priv->rx_skb[desc_idx] = skb; +- p = dma_map_single(&priv->pdev->dev, skb->data, +- priv->rx_skb_size, +- DMA_FROM_DEVICE); +- desc->address = p; ++ priv->rx_buf[desc_idx] = buf; ++ desc->address = dma_map_single(&priv->pdev->dev, ++ buf + priv->rx_buf_offset, ++ priv->rx_buf_size, ++ DMA_FROM_DEVICE); + } + +- len_stat = priv->rx_skb_size << DMADESC_LENGTH_SHIFT; ++ len_stat = priv->rx_buf_size << DMADESC_LENGTH_SHIFT; + len_stat |= DMADESC_OWNER_MASK; + if (priv->rx_dirty_desc == priv->rx_ring_size - 1) { + len_stat |= (DMADESC_WRAP_MASK >> priv->dma_desc_shift); +@@ -290,7 +290,7 @@ static void bcm_enet_refill_rx_timer(str + struct net_device *dev = priv->net_dev; + + spin_lock(&priv->rx_lock); +- bcm_enet_refill_rx(dev); ++ bcm_enet_refill_rx(dev, false); + spin_unlock(&priv->rx_lock); + } + +@@ -320,6 +320,7 @@ static int bcm_enet_receive_queue(struct + int desc_idx; + u32 len_stat; + unsigned int len; ++ void *buf; + + desc_idx = priv->rx_curr_desc; + desc = &priv->rx_desc_cpu[desc_idx]; +@@ -365,16 +366,14 @@ static int bcm_enet_receive_queue(struct + } + + /* valid packet */ +- skb = priv->rx_skb[desc_idx]; ++ buf = priv->rx_buf[desc_idx]; + len = (len_stat & DMADESC_LENGTH_MASK) >> DMADESC_LENGTH_SHIFT; + /* don't include FCS */ + len -= 4; + + if (len < copybreak) { +- struct sk_buff *nskb; +- +- nskb = napi_alloc_skb(&priv->napi, len); +- if (!nskb) { ++ skb = napi_alloc_skb(&priv->napi, len); ++ if (unlikely(!skb)) { + /* forget packet, just rearm desc */ + dev->stats.rx_dropped++; + continue; +@@ -382,14 +381,21 @@ static int bcm_enet_receive_queue(struct + + dma_sync_single_for_cpu(kdev, desc->address, + len, DMA_FROM_DEVICE); +- memcpy(nskb->data, skb->data, len); ++ memcpy(skb->data, buf + priv->rx_buf_offset, len); + dma_sync_single_for_device(kdev, desc->address, + len, DMA_FROM_DEVICE); +- skb = nskb; + } else { +- dma_unmap_single(&priv->pdev->dev, desc->address, +- priv->rx_skb_size, DMA_FROM_DEVICE); +- priv->rx_skb[desc_idx] = NULL; ++ dma_unmap_single(kdev, desc->address, ++ priv->rx_buf_size, DMA_FROM_DEVICE); ++ priv->rx_buf[desc_idx] = NULL; ++ ++ skb = build_skb(buf, priv->rx_frag_size); ++ if (unlikely(!skb)) { ++ skb_free_frag(buf); ++ dev->stats.rx_dropped++; ++ continue; ++ } ++ skb_reserve(skb, priv->rx_buf_offset); + } + + skb_put(skb, len); +@@ -403,7 +409,7 @@ static int bcm_enet_receive_queue(struct + netif_receive_skb_list(&rx_list); + + if (processed || !priv->rx_desc_count) { +- bcm_enet_refill_rx(dev); ++ bcm_enet_refill_rx(dev, true); + + /* kick rx dma */ + enet_dmac_writel(priv, priv->dma_chan_en_mask, +@@ -860,22 +866,22 @@ static void bcm_enet_adjust_link(struct + priv->pause_tx ? "tx" : "off"); + } + +-static void bcm_enet_free_rx_skb_ring(struct device *kdev, struct bcm_enet_priv *priv) ++static void bcm_enet_free_rx_buf_ring(struct device *kdev, struct bcm_enet_priv *priv) + { + int i; + + for (i = 0; i < priv->rx_ring_size; i++) { + struct bcm_enet_desc *desc; + +- if (!priv->rx_skb[i]) ++ if (!priv->rx_buf[i]) + continue; + + desc = &priv->rx_desc_cpu[i]; +- dma_unmap_single(kdev, desc->address, priv->rx_skb_size, ++ dma_unmap_single(kdev, desc->address, priv->rx_buf_size, + DMA_FROM_DEVICE); +- kfree_skb(priv->rx_skb[i]); ++ skb_free_frag(priv->rx_buf[i]); + } +- kfree(priv->rx_skb); ++ kfree(priv->rx_buf); + } + + /* +@@ -987,10 +993,10 @@ static int bcm_enet_open(struct net_devi + priv->tx_curr_desc = 0; + spin_lock_init(&priv->tx_lock); + +- /* init & fill rx ring with skbs */ +- priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *), ++ /* init & fill rx ring with buffers */ ++ priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *), + GFP_KERNEL); +- if (!priv->rx_skb) { ++ if (!priv->rx_buf) { + ret = -ENOMEM; + goto out_free_tx_skb; + } +@@ -1007,8 +1013,8 @@ static int bcm_enet_open(struct net_devi + enet_dmac_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0, + ENETDMAC_BUFALLOC, priv->rx_chan); + +- if (bcm_enet_refill_rx(dev)) { +- dev_err(kdev, "cannot allocate rx skb queue\n"); ++ if (bcm_enet_refill_rx(dev, false)) { ++ dev_err(kdev, "cannot allocate rx buffer queue\n"); + ret = -ENOMEM; + goto out; + } +@@ -1102,7 +1108,7 @@ static int bcm_enet_open(struct net_devi + return 0; + + out: +- bcm_enet_free_rx_skb_ring(kdev, priv); ++ bcm_enet_free_rx_buf_ring(kdev, priv); + + out_free_tx_skb: + kfree(priv->tx_skb); +@@ -1208,8 +1214,8 @@ static int bcm_enet_stop(struct net_devi + /* force reclaim of all tx buffers */ + bcm_enet_tx_reclaim(dev, 1); + +- /* free the rx skb ring */ +- bcm_enet_free_rx_skb_ring(kdev, priv); ++ /* free the rx buffer ring */ ++ bcm_enet_free_rx_buf_ring(kdev, priv); + + /* free remaining allocated memory */ + kfree(priv->tx_skb); +@@ -1633,9 +1639,12 @@ static int bcm_enet_change_mtu(struct ne + * align rx buffer size to dma burst len, account FCS since + * it's appended + */ +- priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN, ++ priv->rx_buf_size = ALIGN(actual_mtu + ETH_FCS_LEN, + priv->dma_maxburst * 4); + ++ priv->rx_frag_size = SKB_DATA_ALIGN(priv->rx_buf_offset + priv->rx_buf_size) + ++ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); ++ + dev->mtu = new_mtu; + return 0; + } +@@ -1720,6 +1729,7 @@ static int bcm_enet_probe(struct platfor + + priv->enet_is_sw = false; + priv->dma_maxburst = BCMENET_DMA_MAXBURST; ++ priv->rx_buf_offset = NET_SKB_PAD; + + ret = bcm_enet_change_mtu(dev, dev->mtu); + if (ret) +@@ -2137,7 +2147,7 @@ static int bcm_enetsw_open(struct net_de + priv->tx_skb = kcalloc(priv->tx_ring_size, sizeof(struct sk_buff *), + GFP_KERNEL); + if (!priv->tx_skb) { +- dev_err(kdev, "cannot allocate rx skb queue\n"); ++ dev_err(kdev, "cannot allocate tx skb queue\n"); + ret = -ENOMEM; + goto out_free_tx_ring; + } +@@ -2147,11 +2157,11 @@ static int bcm_enetsw_open(struct net_de + priv->tx_curr_desc = 0; + spin_lock_init(&priv->tx_lock); + +- /* init & fill rx ring with skbs */ +- priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *), ++ /* init & fill rx ring with buffers */ ++ priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *), + GFP_KERNEL); +- if (!priv->rx_skb) { +- dev_err(kdev, "cannot allocate rx skb queue\n"); ++ if (!priv->rx_buf) { ++ dev_err(kdev, "cannot allocate rx buffer queue\n"); + ret = -ENOMEM; + goto out_free_tx_skb; + } +@@ -2198,8 +2208,8 @@ static int bcm_enetsw_open(struct net_de + enet_dma_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0, + ENETDMA_BUFALLOC_REG(priv->rx_chan)); + +- if (bcm_enet_refill_rx(dev)) { +- dev_err(kdev, "cannot allocate rx skb queue\n"); ++ if (bcm_enet_refill_rx(dev, false)) { ++ dev_err(kdev, "cannot allocate rx buffer queue\n"); + ret = -ENOMEM; + goto out; + } +@@ -2298,7 +2308,7 @@ static int bcm_enetsw_open(struct net_de + return 0; + + out: +- bcm_enet_free_rx_skb_ring(kdev, priv); ++ bcm_enet_free_rx_buf_ring(kdev, priv); + + out_free_tx_skb: + kfree(priv->tx_skb); +@@ -2348,8 +2358,8 @@ static int bcm_enetsw_stop(struct net_de + /* force reclaim of all tx buffers */ + bcm_enet_tx_reclaim(dev, 1); + +- /* free the rx skb ring */ +- bcm_enet_free_rx_skb_ring(kdev, priv); ++ /* free the rx buffer ring */ ++ bcm_enet_free_rx_buf_ring(kdev, priv); + + /* free remaining allocated memory */ + kfree(priv->tx_skb); +@@ -2648,6 +2658,7 @@ static int bcm_enetsw_probe(struct platf + priv->rx_ring_size = BCMENET_DEF_RX_DESC; + priv->tx_ring_size = BCMENET_DEF_TX_DESC; + priv->dma_maxburst = BCMENETSW_DMA_MAXBURST; ++ priv->rx_buf_offset = NET_SKB_PAD + NET_IP_ALIGN; + + pd = dev_get_platdata(&pdev->dev); + if (pd) { +--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h ++++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h +@@ -230,11 +230,17 @@ struct bcm_enet_priv { + /* next dirty rx descriptor to refill */ + int rx_dirty_desc; + +- /* size of allocated rx skbs */ +- unsigned int rx_skb_size; ++ /* size of allocated rx buffers */ ++ unsigned int rx_buf_size; + +- /* list of skb given to hw for rx */ +- struct sk_buff **rx_skb; ++ /* allocated rx buffer offset */ ++ unsigned int rx_buf_offset; ++ ++ /* size of allocated rx frag */ ++ unsigned int rx_frag_size; ++ ++ /* list of buffer given to hw for rx */ ++ void **rx_buf; + + /* used when rx skb allocation failed, so we defer rx queue + * refill */ |