aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch')
-rw-r--r--target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch395
1 files changed, 395 insertions, 0 deletions
diff --git a/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
new file mode 100644
index 0000000000..dca516e878
--- /dev/null
+++ b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
@@ -0,0 +1,395 @@
+From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
+From: Sricharan R <sricharan@codeaurora.org>
+Date: Mon, 28 Aug 2017 20:30:24 +0530
+Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
+
+The bam dmaengine has a circular FIFO to which we
+add hw descriptors that describes the transaction.
+The FIFO has space for about 4096 hw descriptors.
+
+Currently we add one descriptor and wait for it to
+complete with interrupt and then add the next pending
+descriptor. In this way, the FIFO is underutilized
+since only one descriptor is processed at a time, although
+there is space in FIFO for the BAM to process more.
+
+Instead keep adding descriptors to FIFO till its full,
+that allows BAM to continue to work on the next descriptor
+immediately after signalling completion interrupt for the
+previous descriptor.
+
+Also when the client has not set the DMA_PREP_INTERRUPT for
+a descriptor, then do not configure BAM to trigger a interrupt
+upon completion of that descriptor. This way we get a interrupt
+only for the descriptor for which DMA_PREP_INTERRUPT was
+requested and there signal completion of all the previous completed
+descriptors. So we still do callbacks for all requested descriptors,
+but just that the number of interrupts are reduced.
+
+CURRENT:
+
+ ------ ------- ---------------
+ |DES 0| |DESC 1| |DESC 2 + INT |
+ ------ ------- ---------------
+ | | |
+ | | |
+INTERRUPT: (INT) (INT) (INT)
+CALLBACK: (CB) (CB) (CB)
+
+ MTD_SPEEDTEST READ PAGE: 3560 KiB/s
+ MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
+ IOZONE READ: 2456 KB/s
+ IOZONE WRITE: 1230 KB/s
+
+ bam dma interrupts (after tests): 96508
+
+CHANGE:
+
+ ------ ------- -------------
+ |DES 0| |DESC 1 |DESC 2 + INT |
+ ------ ------- --------------
+ |
+ |
+ (INT)
+ (CB for 0, 1, 2)
+
+ MTD_SPEEDTEST READ PAGE: 3860 KiB/s
+ MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
+ IOZONE READ: 2677 KB/s
+ IOZONE WRITE: 1308 KB/s
+
+ bam dma interrupts (after tests): 58806
+
+Signed-off-by: Sricharan R <sricharan@codeaurora.org>
+Reviewed-by: Andy Gross <andy.gross@linaro.org>
+Tested-by: Abhishek Sahu <absahu@codeaurora.org>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+---
+ drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 109 insertions(+), 60 deletions(-)
+
+--- a/drivers/dma/qcom/bam_dma.c
++++ b/drivers/dma/qcom/bam_dma.c
+@@ -46,6 +46,7 @@
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_dma.h>
++#include <linux/circ_buf.h>
+ #include <linux/clk.h>
+ #include <linux/dmaengine.h>
+ #include <linux/pm_runtime.h>
+@@ -78,6 +79,8 @@ struct bam_async_desc {
+
+ struct bam_desc_hw *curr_desc;
+
++ /* list node for the desc in the bam_chan list of descriptors */
++ struct list_head desc_node;
+ enum dma_transfer_direction dir;
+ size_t length;
+ struct bam_desc_hw desc[0];
+@@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
+ #define BAM_DESC_FIFO_SIZE SZ_32K
+ #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+ #define BAM_FIFO_SIZE (SZ_32K - 8)
++#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
++ MAX_DESCRIPTORS + 1) == 0)
+
+ struct bam_chan {
+ struct virt_dma_chan vc;
+@@ -356,8 +361,6 @@ struct bam_chan {
+ /* configuration from device tree */
+ u32 id;
+
+- struct bam_async_desc *curr_txd; /* current running dma */
+-
+ /* runtime configuration */
+ struct dma_slave_config slave;
+
+@@ -372,6 +375,8 @@ struct bam_chan {
+ unsigned int initialized; /* is the channel hw initialized? */
+ unsigned int paused; /* is the channel paused? */
+ unsigned int reconfigure; /* new slave config? */
++ /* list of descriptors currently processed */
++ struct list_head desc_list;
+
+ struct list_head node;
+ };
+@@ -539,7 +544,7 @@ static void bam_free_chan(struct dma_cha
+
+ vchan_free_chan_resources(to_virt_chan(chan));
+
+- if (bchan->curr_txd) {
++ if (!list_empty(&bchan->desc_list)) {
+ dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
+ goto err;
+ }
+@@ -632,8 +637,6 @@ static struct dma_async_tx_descriptor *b
+
+ if (flags & DMA_PREP_INTERRUPT)
+ async_desc->flags |= DESC_FLAG_EOT;
+- else
+- async_desc->flags |= DESC_FLAG_INT;
+
+ async_desc->num_desc = num_alloc;
+ async_desc->curr_desc = async_desc->desc;
+@@ -684,14 +687,16 @@ err_out:
+ static int bam_dma_terminate_all(struct dma_chan *chan)
+ {
+ struct bam_chan *bchan = to_bam_chan(chan);
++ struct bam_async_desc *async_desc, *tmp;
+ unsigned long flag;
+ LIST_HEAD(head);
+
+ /* remove all transactions, including active transaction */
+ spin_lock_irqsave(&bchan->vc.lock, flag);
+- if (bchan->curr_txd) {
+- list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
+- bchan->curr_txd = NULL;
++ list_for_each_entry_safe(async_desc, tmp,
++ &bchan->desc_list, desc_node) {
++ list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
++ list_del(&async_desc->desc_node);
+ }
+
+ vchan_get_all_descriptors(&bchan->vc, &head);
+@@ -763,9 +768,9 @@ static int bam_resume(struct dma_chan *c
+ */
+ static u32 process_channel_irqs(struct bam_device *bdev)
+ {
+- u32 i, srcs, pipe_stts;
++ u32 i, srcs, pipe_stts, offset, avail;
+ unsigned long flags;
+- struct bam_async_desc *async_desc;
++ struct bam_async_desc *async_desc, *tmp;
+
+ srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
+
+@@ -785,27 +790,40 @@ static u32 process_channel_irqs(struct b
+ writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+- async_desc = bchan->curr_txd;
+
+- if (async_desc) {
+- async_desc->num_desc -= async_desc->xfer_len;
+- async_desc->curr_desc += async_desc->xfer_len;
+- bchan->curr_txd = NULL;
++ offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
++ P_SW_OFSTS_MASK;
++ offset /= sizeof(struct bam_desc_hw);
++
++ /* Number of bytes available to read */
++ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
++
++ list_for_each_entry_safe(async_desc, tmp,
++ &bchan->desc_list, desc_node) {
++ /* Not enough data to read */
++ if (avail < async_desc->xfer_len)
++ break;
+
+ /* manage FIFO */
+ bchan->head += async_desc->xfer_len;
+ bchan->head %= MAX_DESCRIPTORS;
+
++ async_desc->num_desc -= async_desc->xfer_len;
++ async_desc->curr_desc += async_desc->xfer_len;
++ avail -= async_desc->xfer_len;
++
+ /*
+- * if complete, process cookie. Otherwise
++ * if complete, process cookie. Otherwise
+ * push back to front of desc_issued so that
+ * it gets restarted by the tasklet
+ */
+- if (!async_desc->num_desc)
++ if (!async_desc->num_desc) {
+ vchan_cookie_complete(&async_desc->vd);
+- else
++ } else {
+ list_add(&async_desc->vd.node,
+- &bchan->vc.desc_issued);
++ &bchan->vc.desc_issued);
++ }
++ list_del(&async_desc->desc_node);
+ }
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+@@ -867,6 +885,7 @@ static enum dma_status bam_tx_status(str
+ struct dma_tx_state *txstate)
+ {
+ struct bam_chan *bchan = to_bam_chan(chan);
++ struct bam_async_desc *async_desc;
+ struct virt_dma_desc *vd;
+ int ret;
+ size_t residue = 0;
+@@ -882,11 +901,17 @@ static enum dma_status bam_tx_status(str
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+ vd = vchan_find_desc(&bchan->vc, cookie);
+- if (vd)
++ if (vd) {
+ residue = container_of(vd, struct bam_async_desc, vd)->length;
+- else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
+- for (i = 0; i < bchan->curr_txd->num_desc; i++)
+- residue += bchan->curr_txd->curr_desc[i].size;
++ } else {
++ list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
++ if (async_desc->vd.tx.cookie != cookie)
++ continue;
++
++ for (i = 0; i < async_desc->num_desc; i++)
++ residue += async_desc->curr_desc[i].size;
++ }
++ }
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+
+@@ -927,63 +952,86 @@ static void bam_start_dma(struct bam_cha
+ {
+ struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+ struct bam_device *bdev = bchan->bdev;
+- struct bam_async_desc *async_desc;
++ struct bam_async_desc *async_desc = NULL;
+ struct bam_desc_hw *desc;
+ struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
+ sizeof(struct bam_desc_hw));
+ int ret;
++ unsigned int avail;
++ struct dmaengine_desc_callback cb;
+
+ lockdep_assert_held(&bchan->vc.lock);
+
+ if (!vd)
+ return;
+
+- list_del(&vd->node);
+-
+- async_desc = container_of(vd, struct bam_async_desc, vd);
+- bchan->curr_txd = async_desc;
+-
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return;
+
+- /* on first use, initialize the channel hardware */
+- if (!bchan->initialized)
+- bam_chan_init_hw(bchan, async_desc->dir);
+-
+- /* apply new slave config changes, if necessary */
+- if (bchan->reconfigure)
+- bam_apply_new_config(bchan, async_desc->dir);
++ while (vd && !IS_BUSY(bchan)) {
++ list_del(&vd->node);
+
+- desc = bchan->curr_txd->curr_desc;
++ async_desc = container_of(vd, struct bam_async_desc, vd);
+
+- if (async_desc->num_desc > MAX_DESCRIPTORS)
+- async_desc->xfer_len = MAX_DESCRIPTORS;
+- else
+- async_desc->xfer_len = async_desc->num_desc;
++ /* on first use, initialize the channel hardware */
++ if (!bchan->initialized)
++ bam_chan_init_hw(bchan, async_desc->dir);
+
+- /* set any special flags on the last descriptor */
+- if (async_desc->num_desc == async_desc->xfer_len)
+- desc[async_desc->xfer_len - 1].flags |=
+- cpu_to_le16(async_desc->flags);
+- else
+- desc[async_desc->xfer_len - 1].flags |=
+- cpu_to_le16(DESC_FLAG_INT);
++ /* apply new slave config changes, if necessary */
++ if (bchan->reconfigure)
++ bam_apply_new_config(bchan, async_desc->dir);
++
++ desc = async_desc->curr_desc;
++ avail = CIRC_SPACE(bchan->tail, bchan->head,
++ MAX_DESCRIPTORS + 1);
++
++ if (async_desc->num_desc > avail)
++ async_desc->xfer_len = avail;
++ else
++ async_desc->xfer_len = async_desc->num_desc;
++
++ /* set any special flags on the last descriptor */
++ if (async_desc->num_desc == async_desc->xfer_len)
++ desc[async_desc->xfer_len - 1].flags |=
++ cpu_to_le16(async_desc->flags);
+
+- if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+- u32 partial = MAX_DESCRIPTORS - bchan->tail;
++ vd = vchan_next_desc(&bchan->vc);
+
+- memcpy(&fifo[bchan->tail], desc,
+- partial * sizeof(struct bam_desc_hw));
+- memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
++ dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
++
++ /*
++ * An interrupt is generated at this desc, if
++ * - FIFO is FULL.
++ * - No more descriptors to add.
++ * - If a callback completion was requested for this DESC,
++ * In this case, BAM will deliver the completion callback
++ * for this desc and continue processing the next desc.
++ */
++ if (((avail <= async_desc->xfer_len) || !vd ||
++ dmaengine_desc_callback_valid(&cb)) &&
++ !(async_desc->flags & DESC_FLAG_EOT))
++ desc[async_desc->xfer_len - 1].flags |=
++ cpu_to_le16(DESC_FLAG_INT);
++
++ if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
++ u32 partial = MAX_DESCRIPTORS - bchan->tail;
++
++ memcpy(&fifo[bchan->tail], desc,
++ partial * sizeof(struct bam_desc_hw));
++ memcpy(fifo, &desc[partial],
++ (async_desc->xfer_len - partial) *
+ sizeof(struct bam_desc_hw));
+- } else {
+- memcpy(&fifo[bchan->tail], desc,
+- async_desc->xfer_len * sizeof(struct bam_desc_hw));
+- }
++ } else {
++ memcpy(&fifo[bchan->tail], desc,
++ async_desc->xfer_len *
++ sizeof(struct bam_desc_hw));
++ }
+
+- bchan->tail += async_desc->xfer_len;
+- bchan->tail %= MAX_DESCRIPTORS;
++ bchan->tail += async_desc->xfer_len;
++ bchan->tail %= MAX_DESCRIPTORS;
++ list_add_tail(&async_desc->desc_node, &bchan->desc_list);
++ }
+
+ /* ensure descriptor writes and dma start not reordered */
+ wmb();
+@@ -1012,7 +1060,7 @@ static void dma_tasklet(unsigned long da
+ bchan = &bdev->channels[i];
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+
+- if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
++ if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
+ bam_start_dma(bchan);
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ }
+@@ -1033,7 +1081,7 @@ static void bam_issue_pending(struct dma
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+
+ /* if work pending and idle, start a transaction */
+- if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
++ if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
+ bam_start_dma(bchan);
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+@@ -1133,6 +1181,7 @@ static void bam_channel_init(struct bam_
+
+ vchan_init(&bchan->vc, &bdev->common);
+ bchan->vc.desc_free = bam_dma_free_desc;
++ INIT_LIST_HEAD(&bchan->desc_list);
+ }
+
+ static const struct of_device_id bam_of_match[] = {