diff options
Diffstat (limited to 'target/linux/generic/pending-5.15/704-05-v6.4-net-mvneta-allocate-TSO-header-DMA-memory-in-chunks.patch')
-rw-r--r-- | target/linux/generic/pending-5.15/704-05-v6.4-net-mvneta-allocate-TSO-header-DMA-memory-in-chunks.patch | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/target/linux/generic/pending-5.15/704-05-v6.4-net-mvneta-allocate-TSO-header-DMA-memory-in-chunks.patch b/target/linux/generic/pending-5.15/704-05-v6.4-net-mvneta-allocate-TSO-header-DMA-memory-in-chunks.patch new file mode 100644 index 0000000000..395a0bf5d2 --- /dev/null +++ b/target/linux/generic/pending-5.15/704-05-v6.4-net-mvneta-allocate-TSO-header-DMA-memory-in-chunks.patch @@ -0,0 +1,179 @@ +From 58d50fb089da553023df5a05f5ae86feaacc7f24 Mon Sep 17 00:00:00 2001 +From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk> +Date: Mon, 3 Apr 2023 19:30:40 +0100 +Subject: [PATCH 5/5] net: mvneta: allocate TSO header DMA memory in chunks + +Now that we no longer need to check whether the DMA address is within +the TSO header DMA memory range for the queue, we can allocate the TSO +header DMA memory in chunks rather than one contiguous order-6 chunk, +which can stress the kernel's memory subsystems to allocate. + +Instead, use order-1 (8k) allocations, which will result in 32 order-1 +pages containing 32 TSO headers. + +Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> +--- + drivers/net/ethernet/marvell/mvneta.c | 88 +++++++++++++++++++++------ + 1 file changed, 70 insertions(+), 18 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -314,6 +314,15 @@ + + #define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + ++/* The size of a TSO header page */ ++#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE) ++ ++/* Number of TSO headers per page. This should be a power of 2 */ ++#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE) ++ ++/* Maximum number of TSO header pages */ ++#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE) ++ + /* descriptor aligned size */ + #define MVNETA_DESC_ALIGNED_SIZE 32 + +@@ -656,10 +665,10 @@ struct mvneta_tx_queue { + int next_desc_to_proc; + + /* DMA buffers for TSO headers */ +- char *tso_hdrs; ++ char *tso_hdrs[MVNETA_MAX_TSO_PAGES]; + + /* DMA address of TSO headers */ +- dma_addr_t tso_hdrs_phys; ++ dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES]; + + /* Affinity mask for CPUs*/ + cpumask_t affinity_mask; +@@ -2592,24 +2601,71 @@ err_drop_frame: + return rx_done; + } + ++static void mvneta_free_tso_hdrs(struct mvneta_port *pp, ++ struct mvneta_tx_queue *txq) ++{ ++ struct device *dev = pp->dev->dev.parent; ++ int i; ++ ++ for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) { ++ if (txq->tso_hdrs[i]) { ++ dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE, ++ txq->tso_hdrs[i], ++ txq->tso_hdrs_phys[i]); ++ txq->tso_hdrs[i] = NULL; ++ } ++ } ++} ++ ++static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp, ++ struct mvneta_tx_queue *txq) ++{ ++ struct device *dev = pp->dev->dev.parent; ++ int i, num; ++ ++ num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE); ++ for (i = 0; i < num; i++) { ++ txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE, ++ &txq->tso_hdrs_phys[i], ++ GFP_KERNEL); ++ if (!txq->tso_hdrs[i]) { ++ mvneta_free_tso_hdrs(pp, txq); ++ return -ENOMEM; ++ } ++ } ++ ++ return 0; ++} ++ ++static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma) ++{ ++ int index, offset; ++ ++ index = txq->txq_put_index / MVNETA_TSO_PER_PAGE; ++ offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE; ++ ++ *dma = txq->tso_hdrs_phys[index] + offset; ++ ++ return txq->tso_hdrs[index] + offset; ++} ++ + static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq, + struct tso_t *tso, int size, bool is_last) + { +- int tso_offset, hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); ++ int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + struct mvneta_tx_desc *tx_desc; ++ dma_addr_t hdr_phys; + char *hdr; + +- tso_offset = txq->txq_put_index * TSO_HEADER_SIZE; +- +- hdr = txq->tso_hdrs + tso_offset; ++ hdr = mvneta_get_tso_hdr(txq, &hdr_phys); + tso_build_hdr(skb, hdr, tso, size, is_last); + + tx_desc = mvneta_txq_next_desc_get(txq); + tx_desc->data_size = hdr_len; + tx_desc->command = mvneta_skb_tx_csum(skb); + tx_desc->command |= MVNETA_TXD_F_DESC; +- tx_desc->buf_phys_addr = txq->tso_hdrs_phys + tso_offset; ++ tx_desc->buf_phys_addr = hdr_phys; + buf->type = MVNETA_TYPE_TSO; + buf->skb = NULL; + +@@ -3401,7 +3457,7 @@ static void mvneta_rxq_deinit(struct mvn + static int mvneta_txq_sw_init(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) + { +- int cpu; ++ int cpu, err; + + txq->size = pp->tx_ring_size; + +@@ -3426,11 +3482,9 @@ static int mvneta_txq_sw_init(struct mvn + return -ENOMEM; + + /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ +- txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, +- txq->size * TSO_HEADER_SIZE, +- &txq->tso_hdrs_phys, GFP_KERNEL); +- if (!txq->tso_hdrs) +- return -ENOMEM; ++ err = mvneta_alloc_tso_hdrs(pp, txq); ++ if (err) ++ return err; + + /* Setup XPS mapping */ + if (pp->neta_armada3700) +@@ -3482,10 +3536,7 @@ static void mvneta_txq_sw_deinit(struct + + kfree(txq->buf); + +- if (txq->tso_hdrs) +- dma_free_coherent(pp->dev->dev.parent, +- txq->size * TSO_HEADER_SIZE, +- txq->tso_hdrs, txq->tso_hdrs_phys); ++ mvneta_free_tso_hdrs(pp, txq); + if (txq->descs) + dma_free_coherent(pp->dev->dev.parent, + txq->size * MVNETA_DESC_ALIGNED_SIZE, +@@ -3494,7 +3545,6 @@ static void mvneta_txq_sw_deinit(struct + netdev_tx_reset_queue(nq); + + txq->buf = NULL; +- txq->tso_hdrs = NULL; + txq->descs = NULL; + txq->last_desc = 0; + txq->next_desc_to_proc = 0; +@@ -5543,6 +5593,8 @@ static int __init mvneta_driver_init(voi + { + int ret; + ++ BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE); ++ + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online", + mvneta_cpu_online, + mvneta_cpu_down_prepare); |