diff options
Diffstat (limited to 'target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch')
-rw-r--r-- | target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch | 395 |
1 files changed, 395 insertions, 0 deletions
diff --git a/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch new file mode 100644 index 0000000000..dca516e878 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch @@ -0,0 +1,395 @@ +From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001 +From: Sricharan R <sricharan@codeaurora.org> +Date: Mon, 28 Aug 2017 20:30:24 +0530 +Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors + +The bam dmaengine has a circular FIFO to which we +add hw descriptors that describes the transaction. +The FIFO has space for about 4096 hw descriptors. + +Currently we add one descriptor and wait for it to +complete with interrupt and then add the next pending +descriptor. In this way, the FIFO is underutilized +since only one descriptor is processed at a time, although +there is space in FIFO for the BAM to process more. + +Instead keep adding descriptors to FIFO till its full, +that allows BAM to continue to work on the next descriptor +immediately after signalling completion interrupt for the +previous descriptor. + +Also when the client has not set the DMA_PREP_INTERRUPT for +a descriptor, then do not configure BAM to trigger a interrupt +upon completion of that descriptor. This way we get a interrupt +only for the descriptor for which DMA_PREP_INTERRUPT was +requested and there signal completion of all the previous completed +descriptors. So we still do callbacks for all requested descriptors, +but just that the number of interrupts are reduced. + +CURRENT: + + ------ ------- --------------- + |DES 0| |DESC 1| |DESC 2 + INT | + ------ ------- --------------- + | | | + | | | +INTERRUPT: (INT) (INT) (INT) +CALLBACK: (CB) (CB) (CB) + + MTD_SPEEDTEST READ PAGE: 3560 KiB/s + MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s + IOZONE READ: 2456 KB/s + IOZONE WRITE: 1230 KB/s + + bam dma interrupts (after tests): 96508 + +CHANGE: + + ------ ------- ------------- + |DES 0| |DESC 1 |DESC 2 + INT | + ------ ------- -------------- + | + | + (INT) + (CB for 0, 1, 2) + + MTD_SPEEDTEST READ PAGE: 3860 KiB/s + MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s + IOZONE READ: 2677 KB/s + IOZONE WRITE: 1308 KB/s + + bam dma interrupts (after tests): 58806 + +Signed-off-by: Sricharan R <sricharan@codeaurora.org> +Reviewed-by: Andy Gross <andy.gross@linaro.org> +Tested-by: Abhishek Sahu <absahu@codeaurora.org> +Signed-off-by: Vinod Koul <vinod.koul@intel.com> +--- + drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++---------------- + 1 file changed, 109 insertions(+), 60 deletions(-) + +--- a/drivers/dma/qcom/bam_dma.c ++++ b/drivers/dma/qcom/bam_dma.c +@@ -46,6 +46,7 @@ + #include <linux/of_address.h> + #include <linux/of_irq.h> + #include <linux/of_dma.h> ++#include <linux/circ_buf.h> + #include <linux/clk.h> + #include <linux/dmaengine.h> + #include <linux/pm_runtime.h> +@@ -78,6 +79,8 @@ struct bam_async_desc { + + struct bam_desc_hw *curr_desc; + ++ /* list node for the desc in the bam_chan list of descriptors */ ++ struct list_head desc_node; + enum dma_transfer_direction dir; + size_t length; + struct bam_desc_hw desc[0]; +@@ -347,6 +350,8 @@ static const struct reg_offset_data bam_ + #define BAM_DESC_FIFO_SIZE SZ_32K + #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1) + #define BAM_FIFO_SIZE (SZ_32K - 8) ++#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\ ++ MAX_DESCRIPTORS + 1) == 0) + + struct bam_chan { + struct virt_dma_chan vc; +@@ -356,8 +361,6 @@ struct bam_chan { + /* configuration from device tree */ + u32 id; + +- struct bam_async_desc *curr_txd; /* current running dma */ +- + /* runtime configuration */ + struct dma_slave_config slave; + +@@ -372,6 +375,8 @@ struct bam_chan { + unsigned int initialized; /* is the channel hw initialized? */ + unsigned int paused; /* is the channel paused? */ + unsigned int reconfigure; /* new slave config? */ ++ /* list of descriptors currently processed */ ++ struct list_head desc_list; + + struct list_head node; + }; +@@ -539,7 +544,7 @@ static void bam_free_chan(struct dma_cha + + vchan_free_chan_resources(to_virt_chan(chan)); + +- if (bchan->curr_txd) { ++ if (!list_empty(&bchan->desc_list)) { + dev_err(bchan->bdev->dev, "Cannot free busy channel\n"); + goto err; + } +@@ -632,8 +637,6 @@ static struct dma_async_tx_descriptor *b + + if (flags & DMA_PREP_INTERRUPT) + async_desc->flags |= DESC_FLAG_EOT; +- else +- async_desc->flags |= DESC_FLAG_INT; + + async_desc->num_desc = num_alloc; + async_desc->curr_desc = async_desc->desc; +@@ -684,14 +687,16 @@ err_out: + static int bam_dma_terminate_all(struct dma_chan *chan) + { + struct bam_chan *bchan = to_bam_chan(chan); ++ struct bam_async_desc *async_desc, *tmp; + unsigned long flag; + LIST_HEAD(head); + + /* remove all transactions, including active transaction */ + spin_lock_irqsave(&bchan->vc.lock, flag); +- if (bchan->curr_txd) { +- list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued); +- bchan->curr_txd = NULL; ++ list_for_each_entry_safe(async_desc, tmp, ++ &bchan->desc_list, desc_node) { ++ list_add(&async_desc->vd.node, &bchan->vc.desc_issued); ++ list_del(&async_desc->desc_node); + } + + vchan_get_all_descriptors(&bchan->vc, &head); +@@ -763,9 +768,9 @@ static int bam_resume(struct dma_chan *c + */ + static u32 process_channel_irqs(struct bam_device *bdev) + { +- u32 i, srcs, pipe_stts; ++ u32 i, srcs, pipe_stts, offset, avail; + unsigned long flags; +- struct bam_async_desc *async_desc; ++ struct bam_async_desc *async_desc, *tmp; + + srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE)); + +@@ -785,27 +790,40 @@ static u32 process_channel_irqs(struct b + writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR)); + + spin_lock_irqsave(&bchan->vc.lock, flags); +- async_desc = bchan->curr_txd; + +- if (async_desc) { +- async_desc->num_desc -= async_desc->xfer_len; +- async_desc->curr_desc += async_desc->xfer_len; +- bchan->curr_txd = NULL; ++ offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) & ++ P_SW_OFSTS_MASK; ++ offset /= sizeof(struct bam_desc_hw); ++ ++ /* Number of bytes available to read */ ++ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1); ++ ++ list_for_each_entry_safe(async_desc, tmp, ++ &bchan->desc_list, desc_node) { ++ /* Not enough data to read */ ++ if (avail < async_desc->xfer_len) ++ break; + + /* manage FIFO */ + bchan->head += async_desc->xfer_len; + bchan->head %= MAX_DESCRIPTORS; + ++ async_desc->num_desc -= async_desc->xfer_len; ++ async_desc->curr_desc += async_desc->xfer_len; ++ avail -= async_desc->xfer_len; ++ + /* +- * if complete, process cookie. Otherwise ++ * if complete, process cookie. Otherwise + * push back to front of desc_issued so that + * it gets restarted by the tasklet + */ +- if (!async_desc->num_desc) ++ if (!async_desc->num_desc) { + vchan_cookie_complete(&async_desc->vd); +- else ++ } else { + list_add(&async_desc->vd.node, +- &bchan->vc.desc_issued); ++ &bchan->vc.desc_issued); ++ } ++ list_del(&async_desc->desc_node); + } + + spin_unlock_irqrestore(&bchan->vc.lock, flags); +@@ -867,6 +885,7 @@ static enum dma_status bam_tx_status(str + struct dma_tx_state *txstate) + { + struct bam_chan *bchan = to_bam_chan(chan); ++ struct bam_async_desc *async_desc; + struct virt_dma_desc *vd; + int ret; + size_t residue = 0; +@@ -882,11 +901,17 @@ static enum dma_status bam_tx_status(str + + spin_lock_irqsave(&bchan->vc.lock, flags); + vd = vchan_find_desc(&bchan->vc, cookie); +- if (vd) ++ if (vd) { + residue = container_of(vd, struct bam_async_desc, vd)->length; +- else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie) +- for (i = 0; i < bchan->curr_txd->num_desc; i++) +- residue += bchan->curr_txd->curr_desc[i].size; ++ } else { ++ list_for_each_entry(async_desc, &bchan->desc_list, desc_node) { ++ if (async_desc->vd.tx.cookie != cookie) ++ continue; ++ ++ for (i = 0; i < async_desc->num_desc; i++) ++ residue += async_desc->curr_desc[i].size; ++ } ++ } + + spin_unlock_irqrestore(&bchan->vc.lock, flags); + +@@ -927,63 +952,86 @@ static void bam_start_dma(struct bam_cha + { + struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc); + struct bam_device *bdev = bchan->bdev; +- struct bam_async_desc *async_desc; ++ struct bam_async_desc *async_desc = NULL; + struct bam_desc_hw *desc; + struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt, + sizeof(struct bam_desc_hw)); + int ret; ++ unsigned int avail; ++ struct dmaengine_desc_callback cb; + + lockdep_assert_held(&bchan->vc.lock); + + if (!vd) + return; + +- list_del(&vd->node); +- +- async_desc = container_of(vd, struct bam_async_desc, vd); +- bchan->curr_txd = async_desc; +- + ret = pm_runtime_get_sync(bdev->dev); + if (ret < 0) + return; + +- /* on first use, initialize the channel hardware */ +- if (!bchan->initialized) +- bam_chan_init_hw(bchan, async_desc->dir); +- +- /* apply new slave config changes, if necessary */ +- if (bchan->reconfigure) +- bam_apply_new_config(bchan, async_desc->dir); ++ while (vd && !IS_BUSY(bchan)) { ++ list_del(&vd->node); + +- desc = bchan->curr_txd->curr_desc; ++ async_desc = container_of(vd, struct bam_async_desc, vd); + +- if (async_desc->num_desc > MAX_DESCRIPTORS) +- async_desc->xfer_len = MAX_DESCRIPTORS; +- else +- async_desc->xfer_len = async_desc->num_desc; ++ /* on first use, initialize the channel hardware */ ++ if (!bchan->initialized) ++ bam_chan_init_hw(bchan, async_desc->dir); + +- /* set any special flags on the last descriptor */ +- if (async_desc->num_desc == async_desc->xfer_len) +- desc[async_desc->xfer_len - 1].flags |= +- cpu_to_le16(async_desc->flags); +- else +- desc[async_desc->xfer_len - 1].flags |= +- cpu_to_le16(DESC_FLAG_INT); ++ /* apply new slave config changes, if necessary */ ++ if (bchan->reconfigure) ++ bam_apply_new_config(bchan, async_desc->dir); ++ ++ desc = async_desc->curr_desc; ++ avail = CIRC_SPACE(bchan->tail, bchan->head, ++ MAX_DESCRIPTORS + 1); ++ ++ if (async_desc->num_desc > avail) ++ async_desc->xfer_len = avail; ++ else ++ async_desc->xfer_len = async_desc->num_desc; ++ ++ /* set any special flags on the last descriptor */ ++ if (async_desc->num_desc == async_desc->xfer_len) ++ desc[async_desc->xfer_len - 1].flags |= ++ cpu_to_le16(async_desc->flags); + +- if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) { +- u32 partial = MAX_DESCRIPTORS - bchan->tail; ++ vd = vchan_next_desc(&bchan->vc); + +- memcpy(&fifo[bchan->tail], desc, +- partial * sizeof(struct bam_desc_hw)); +- memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) * ++ dmaengine_desc_get_callback(&async_desc->vd.tx, &cb); ++ ++ /* ++ * An interrupt is generated at this desc, if ++ * - FIFO is FULL. ++ * - No more descriptors to add. ++ * - If a callback completion was requested for this DESC, ++ * In this case, BAM will deliver the completion callback ++ * for this desc and continue processing the next desc. ++ */ ++ if (((avail <= async_desc->xfer_len) || !vd || ++ dmaengine_desc_callback_valid(&cb)) && ++ !(async_desc->flags & DESC_FLAG_EOT)) ++ desc[async_desc->xfer_len - 1].flags |= ++ cpu_to_le16(DESC_FLAG_INT); ++ ++ if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) { ++ u32 partial = MAX_DESCRIPTORS - bchan->tail; ++ ++ memcpy(&fifo[bchan->tail], desc, ++ partial * sizeof(struct bam_desc_hw)); ++ memcpy(fifo, &desc[partial], ++ (async_desc->xfer_len - partial) * + sizeof(struct bam_desc_hw)); +- } else { +- memcpy(&fifo[bchan->tail], desc, +- async_desc->xfer_len * sizeof(struct bam_desc_hw)); +- } ++ } else { ++ memcpy(&fifo[bchan->tail], desc, ++ async_desc->xfer_len * ++ sizeof(struct bam_desc_hw)); ++ } + +- bchan->tail += async_desc->xfer_len; +- bchan->tail %= MAX_DESCRIPTORS; ++ bchan->tail += async_desc->xfer_len; ++ bchan->tail %= MAX_DESCRIPTORS; ++ list_add_tail(&async_desc->desc_node, &bchan->desc_list); ++ } + + /* ensure descriptor writes and dma start not reordered */ + wmb(); +@@ -1012,7 +1060,7 @@ static void dma_tasklet(unsigned long da + bchan = &bdev->channels[i]; + spin_lock_irqsave(&bchan->vc.lock, flags); + +- if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd) ++ if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan)) + bam_start_dma(bchan); + spin_unlock_irqrestore(&bchan->vc.lock, flags); + } +@@ -1033,7 +1081,7 @@ static void bam_issue_pending(struct dma + spin_lock_irqsave(&bchan->vc.lock, flags); + + /* if work pending and idle, start a transaction */ +- if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd) ++ if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan)) + bam_start_dma(bchan); + + spin_unlock_irqrestore(&bchan->vc.lock, flags); +@@ -1133,6 +1181,7 @@ static void bam_channel_init(struct bam_ + + vchan_init(&bchan->vc, &bdev->common); + bchan->vc.desc_free = bam_dma_free_desc; ++ INIT_LIST_HEAD(&bchan->desc_list); + } + + static const struct of_device_id bam_of_match[] = { |