ipq40xx: add target

Signed-off-by: Christian Lamparter <chunkeey@gmail.com> Signed-off-by: Mathias Kresin <dev@kresin.me> Signed-off-by: John Crispin <john@phrozen.org>
author: John Crispin <john@phrozen.org> 2018-02-21 20:40:50 +0100
committer: Mathias Kresin <dev@kresin.me> 2018-03-14 19:04:50 +0100
commit: 54b275c8ed3ad20c447fd46deec83384822ac79d (patch)
tree: 4198c9f77e467b316940cb78297d3030e67d67ea /target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
parent: b7f115f22a9d79bd45bfe27cfb8d491dac49feb4 (diff)
download: upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.tar.gz
upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.tar.bz2
upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.zip
1 files changed, 395 insertions, 0 deletions
diff --git a/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
new file mode 100644
index 0000000000..dca516e878
--- /dev/null
+++ b/target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
@@ -0,0 +1,395 @@
+From 6b4faeac05bc0b91616b921191cb054d1376f3b4 Mon Sep 17 00:00:00 2001
+From: Sricharan R <sricharan@codeaurora.org>
+Date: Mon, 28 Aug 2017 20:30:24 +0530
+Subject: [PATCH] dmaengine: qcom-bam: Process multiple pending descriptors
+
+The bam dmaengine has a circular FIFO to which we
+add hw descriptors that describes the transaction.
+The FIFO has space for about 4096 hw descriptors.
+
+Currently we add one descriptor and wait for it to
+complete with interrupt and then add the next pending
+descriptor. In this way, the FIFO is underutilized
+since only one descriptor is processed at a time, although
+there is space in FIFO for the BAM to process more.
+
+Instead keep adding descriptors to FIFO till its full,
+that allows BAM to continue to work on the next descriptor
+immediately after signalling completion interrupt for the
+previous descriptor.
+
+Also when the client has not set the DMA_PREP_INTERRUPT for
+a descriptor, then do not configure BAM to trigger a interrupt
+upon completion of that descriptor. This way we get a interrupt
+only for the descriptor for which DMA_PREP_INTERRUPT was
+requested and there signal completion of all the previous completed
+descriptors. So we still do callbacks for all requested descriptors,
+but just that the number of interrupts are reduced.
+
+CURRENT:
+
+            ------      -------   ---------------
+            |DES 0|     |DESC 1|  |DESC 2 + INT |
+            ------      -------   ---------------
+               |           |            |
+               |           |            |
+INTERRUPT:   (INT)       (INT)	      (INT)
+CALLBACK:     (CB)        (CB)         (CB)
+
+		MTD_SPEEDTEST READ PAGE: 3560 KiB/s
+		MTD_SPEEDTEST WRITE PAGE: 2664 KiB/s
+		IOZONE READ: 2456 KB/s
+		IOZONE WRITE: 1230 KB/s
+
+	bam dma interrupts (after tests): 96508
+
+CHANGE:
+
+        ------  -------    -------------
+        |DES 0| |DESC 1   |DESC 2 + INT |
+        ------  -------   --------------
+				|
+				|
+          		      (INT)
+			      (CB for 0, 1, 2)
+
+		MTD_SPEEDTEST READ PAGE: 3860 KiB/s
+		MTD_SPEEDTEST WRITE PAGE: 2837 KiB/s
+		IOZONE READ: 2677 KB/s
+		IOZONE WRITE: 1308 KB/s
+
+	bam dma interrupts (after tests): 58806
+
+Signed-off-by: Sricharan R <sricharan@codeaurora.org>
+Reviewed-by: Andy Gross <andy.gross@linaro.org>
+Tested-by: Abhishek Sahu <absahu@codeaurora.org>
+Signed-off-by: Vinod Koul <vinod.koul@intel.com>
+---
+ drivers/dma/qcom/bam_dma.c | 169 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 109 insertions(+), 60 deletions(-)
+
+--- a/drivers/dma/qcom/bam_dma.c
++++ b/drivers/dma/qcom/bam_dma.c
+@@ -46,6 +46,7 @@
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_dma.h>
++#include <linux/circ_buf.h>
+ #include <linux/clk.h>
+ #include <linux/dmaengine.h>
+ #include <linux/pm_runtime.h>
+@@ -78,6 +79,8 @@ struct bam_async_desc {
+ 
+ 	struct bam_desc_hw *curr_desc;
+ 
++	/* list node for the desc in the bam_chan list of descriptors */
++	struct list_head desc_node;
+ 	enum dma_transfer_direction dir;
+ 	size_t length;
+ 	struct bam_desc_hw desc[0];
+@@ -347,6 +350,8 @@ static const struct reg_offset_data bam_
+ #define BAM_DESC_FIFO_SIZE	SZ_32K
+ #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+ #define BAM_FIFO_SIZE	(SZ_32K - 8)
++#define IS_BUSY(chan)	(CIRC_SPACE(bchan->tail, bchan->head,\
++			 MAX_DESCRIPTORS + 1) == 0)
+ 
+ struct bam_chan {
+ 	struct virt_dma_chan vc;
+@@ -356,8 +361,6 @@ struct bam_chan {
+ 	/* configuration from device tree */
+ 	u32 id;
+ 
+-	struct bam_async_desc *curr_txd;	/* current running dma */
+-
+ 	/* runtime configuration */
+ 	struct dma_slave_config slave;
+ 
+@@ -372,6 +375,8 @@ struct bam_chan {
+ 	unsigned int initialized;	/* is the channel hw initialized? */
+ 	unsigned int paused;		/* is the channel paused? */
+ 	unsigned int reconfigure;	/* new slave config? */
++	/* list of descriptors currently processed */
++	struct list_head desc_list;
+ 
+ 	struct list_head node;
+ };
+@@ -539,7 +544,7 @@ static void bam_free_chan(struct dma_cha
+ 
+ 	vchan_free_chan_resources(to_virt_chan(chan));
+ 
+-	if (bchan->curr_txd) {
++	if (!list_empty(&bchan->desc_list)) {
+ 		dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
+ 		goto err;
+ 	}
+@@ -632,8 +637,6 @@ static struct dma_async_tx_descriptor *b
+ 
+ 	if (flags & DMA_PREP_INTERRUPT)
+ 		async_desc->flags |= DESC_FLAG_EOT;
+-	else
+-		async_desc->flags |= DESC_FLAG_INT;
+ 
+ 	async_desc->num_desc = num_alloc;
+ 	async_desc->curr_desc = async_desc->desc;
+@@ -684,14 +687,16 @@ err_out:
+ static int bam_dma_terminate_all(struct dma_chan *chan)
+ {
+ 	struct bam_chan *bchan = to_bam_chan(chan);
++	struct bam_async_desc *async_desc, *tmp;
+ 	unsigned long flag;
+ 	LIST_HEAD(head);
+ 
+ 	/* remove all transactions, including active transaction */
+ 	spin_lock_irqsave(&bchan->vc.lock, flag);
+-	if (bchan->curr_txd) {
+-		list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
+-		bchan->curr_txd = NULL;
++	list_for_each_entry_safe(async_desc, tmp,
++				 &bchan->desc_list, desc_node) {
++		list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
++		list_del(&async_desc->desc_node);
+ 	}
+ 
+ 	vchan_get_all_descriptors(&bchan->vc, &head);
+@@ -763,9 +768,9 @@ static int bam_resume(struct dma_chan *c
+  */
+ static u32 process_channel_irqs(struct bam_device *bdev)
+ {
+-	u32 i, srcs, pipe_stts;
++	u32 i, srcs, pipe_stts, offset, avail;
+ 	unsigned long flags;
+-	struct bam_async_desc *async_desc;
++	struct bam_async_desc *async_desc, *tmp;
+ 
+ 	srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
+ 
+@@ -785,27 +790,40 @@ static u32 process_channel_irqs(struct b
+ 		writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
+ 
+ 		spin_lock_irqsave(&bchan->vc.lock, flags);
+-		async_desc = bchan->curr_txd;
+ 
+-		if (async_desc) {
+-			async_desc->num_desc -= async_desc->xfer_len;
+-			async_desc->curr_desc += async_desc->xfer_len;
+-			bchan->curr_txd = NULL;
++		offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
++				       P_SW_OFSTS_MASK;
++		offset /= sizeof(struct bam_desc_hw);
++
++		/* Number of bytes available to read */
++		avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
++
++		list_for_each_entry_safe(async_desc, tmp,
++					 &bchan->desc_list, desc_node) {
++			/* Not enough data to read */
++			if (avail < async_desc->xfer_len)
++				break;
+ 
+ 			/* manage FIFO */
+ 			bchan->head += async_desc->xfer_len;
+ 			bchan->head %= MAX_DESCRIPTORS;
+ 
++			async_desc->num_desc -= async_desc->xfer_len;
++			async_desc->curr_desc += async_desc->xfer_len;
++			avail -= async_desc->xfer_len;
++
+ 			/*
+-			 * if complete, process cookie.  Otherwise
++			 * if complete, process cookie. Otherwise
+ 			 * push back to front of desc_issued so that
+ 			 * it gets restarted by the tasklet
+ 			 */
+-			if (!async_desc->num_desc)
++			if (!async_desc->num_desc) {
+ 				vchan_cookie_complete(&async_desc->vd);
+-			else
++			} else {
+ 				list_add(&async_desc->vd.node,
+-					&bchan->vc.desc_issued);
++					 &bchan->vc.desc_issued);
++			}
++			list_del(&async_desc->desc_node);
+ 		}
+ 
+ 		spin_unlock_irqrestore(&bchan->vc.lock, flags);
+@@ -867,6 +885,7 @@ static enum dma_status bam_tx_status(str
+ 		struct dma_tx_state *txstate)
+ {
+ 	struct bam_chan *bchan = to_bam_chan(chan);
++	struct bam_async_desc *async_desc;
+ 	struct virt_dma_desc *vd;
+ 	int ret;
+ 	size_t residue = 0;
+@@ -882,11 +901,17 @@ static enum dma_status bam_tx_status(str
+ 
+ 	spin_lock_irqsave(&bchan->vc.lock, flags);
+ 	vd = vchan_find_desc(&bchan->vc, cookie);
+-	if (vd)
++	if (vd) {
+ 		residue = container_of(vd, struct bam_async_desc, vd)->length;
+-	else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
+-		for (i = 0; i < bchan->curr_txd->num_desc; i++)
+-			residue += bchan->curr_txd->curr_desc[i].size;
++	} else {
++		list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
++			if (async_desc->vd.tx.cookie != cookie)
++				continue;
++
++			for (i = 0; i < async_desc->num_desc; i++)
++				residue += async_desc->curr_desc[i].size;
++		}
++	}
+ 
+ 	spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ 
+@@ -927,63 +952,86 @@ static void bam_start_dma(struct bam_cha
+ {
+ 	struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+ 	struct bam_device *bdev = bchan->bdev;
+-	struct bam_async_desc *async_desc;
++	struct bam_async_desc *async_desc = NULL;
+ 	struct bam_desc_hw *desc;
+ 	struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
+ 					sizeof(struct bam_desc_hw));
+ 	int ret;
++	unsigned int avail;
++	struct dmaengine_desc_callback cb;
+ 
+ 	lockdep_assert_held(&bchan->vc.lock);
+ 
+ 	if (!vd)
+ 		return;
+ 
+-	list_del(&vd->node);
+-
+-	async_desc = container_of(vd, struct bam_async_desc, vd);
+-	bchan->curr_txd = async_desc;
+-
+ 	ret = pm_runtime_get_sync(bdev->dev);
+ 	if (ret < 0)
+ 		return;
+ 
+-	/* on first use, initialize the channel hardware */
+-	if (!bchan->initialized)
+-		bam_chan_init_hw(bchan, async_desc->dir);
+-
+-	/* apply new slave config changes, if necessary */
+-	if (bchan->reconfigure)
+-		bam_apply_new_config(bchan, async_desc->dir);
++	while (vd && !IS_BUSY(bchan)) {
++		list_del(&vd->node);
+ 
+-	desc = bchan->curr_txd->curr_desc;
++		async_desc = container_of(vd, struct bam_async_desc, vd);
+ 
+-	if (async_desc->num_desc > MAX_DESCRIPTORS)
+-		async_desc->xfer_len = MAX_DESCRIPTORS;
+-	else
+-		async_desc->xfer_len = async_desc->num_desc;
++		/* on first use, initialize the channel hardware */
++		if (!bchan->initialized)
++			bam_chan_init_hw(bchan, async_desc->dir);
+ 
+-	/* set any special flags on the last descriptor */
+-	if (async_desc->num_desc == async_desc->xfer_len)
+-		desc[async_desc->xfer_len - 1].flags |=
+-					cpu_to_le16(async_desc->flags);
+-	else
+-		desc[async_desc->xfer_len - 1].flags |=
+-					cpu_to_le16(DESC_FLAG_INT);
++		/* apply new slave config changes, if necessary */
++		if (bchan->reconfigure)
++			bam_apply_new_config(bchan, async_desc->dir);
++
++		desc = async_desc->curr_desc;
++		avail = CIRC_SPACE(bchan->tail, bchan->head,
++				   MAX_DESCRIPTORS + 1);
++
++		if (async_desc->num_desc > avail)
++			async_desc->xfer_len = avail;
++		else
++			async_desc->xfer_len = async_desc->num_desc;
++
++		/* set any special flags on the last descriptor */
++		if (async_desc->num_desc == async_desc->xfer_len)
++			desc[async_desc->xfer_len - 1].flags |=
++						cpu_to_le16(async_desc->flags);
+ 
+-	if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+-		u32 partial = MAX_DESCRIPTORS - bchan->tail;
++		vd = vchan_next_desc(&bchan->vc);
+ 
+-		memcpy(&fifo[bchan->tail], desc,
+-				partial * sizeof(struct bam_desc_hw));
+-		memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
++		dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
++
++		/*
++		 * An interrupt is generated at this desc, if
++		 *  - FIFO is FULL.
++		 *  - No more descriptors to add.
++		 *  - If a callback completion was requested for this DESC,
++		 *     In this case, BAM will deliver the completion callback
++		 *     for this desc and continue processing the next desc.
++		 */
++		if (((avail <= async_desc->xfer_len) || !vd ||
++		     dmaengine_desc_callback_valid(&cb)) &&
++		    !(async_desc->flags & DESC_FLAG_EOT))
++			desc[async_desc->xfer_len - 1].flags |=
++				cpu_to_le16(DESC_FLAG_INT);
++
++		if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
++			u32 partial = MAX_DESCRIPTORS - bchan->tail;
++
++			memcpy(&fifo[bchan->tail], desc,
++			       partial * sizeof(struct bam_desc_hw));
++			memcpy(fifo, &desc[partial],
++			       (async_desc->xfer_len - partial) *
+ 				sizeof(struct bam_desc_hw));
+-	} else {
+-		memcpy(&fifo[bchan->tail], desc,
+-			async_desc->xfer_len * sizeof(struct bam_desc_hw));
+-	}
++		} else {
++			memcpy(&fifo[bchan->tail], desc,
++			       async_desc->xfer_len *
++			       sizeof(struct bam_desc_hw));
++		}
+ 
+-	bchan->tail += async_desc->xfer_len;
+-	bchan->tail %= MAX_DESCRIPTORS;
++		bchan->tail += async_desc->xfer_len;
++		bchan->tail %= MAX_DESCRIPTORS;
++		list_add_tail(&async_desc->desc_node, &bchan->desc_list);
++	}
+ 
+ 	/* ensure descriptor writes and dma start not reordered */
+ 	wmb();
+@@ -1012,7 +1060,7 @@ static void dma_tasklet(unsigned long da
+ 		bchan = &bdev->channels[i];
+ 		spin_lock_irqsave(&bchan->vc.lock, flags);
+ 
+-		if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
++		if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
+ 			bam_start_dma(bchan);
+ 		spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ 	}
+@@ -1033,7 +1081,7 @@ static void bam_issue_pending(struct dma
+ 	spin_lock_irqsave(&bchan->vc.lock, flags);
+ 
+ 	/* if work pending and idle, start a transaction */
+-	if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
++	if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
+ 		bam_start_dma(bchan);
+ 
+ 	spin_unlock_irqrestore(&bchan->vc.lock, flags);
+@@ -1133,6 +1181,7 @@ static void bam_channel_init(struct bam_
+ 
+ 	vchan_init(&bchan->vc, &bdev->common);
+ 	bchan->vc.desc_free = bam_dma_free_desc;
++	INIT_LIST_HEAD(&bchan->desc_list);
+ }
+ 
+ static const struct of_device_id bam_of_match[] = {
author	John Crispin <john@phrozen.org>	2018-02-21 20:40:50 +0100
committer	Mathias Kresin <dev@kresin.me>	2018-03-14 19:04:50 +0100
commit	54b275c8ed3ad20c447fd46deec83384822ac79d (patch)
tree	4198c9f77e467b316940cb78297d3030e67d67ea /target/linux/ipq40xx/patches-4.14/040-dmaengine-qcom-bam-Process-multiple-pending-descript.patch
parent	b7f115f22a9d79bd45bfe27cfb8d491dac49feb4 (diff)
download	upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.tar.gz upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.tar.bz2 upstream-54b275c8ed3ad20c447fd46deec83384822ac79d.zip