aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
diff options
context:
space:
mode:
authorChristian Lamparter <chunkeey@googlemail.com>2016-07-20 15:44:52 +0200
committerFelix Fietkau <nbd@nbd.name>2016-07-22 09:48:12 +0200
commitea91ee13a72c1afdf2bceef528c807e3dddf63a9 (patch)
tree0765fa1cc8be5a6ec19029c9b863a19afde30050 /target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
parenta57d6e2d47688cfa392d6ea7f36ae6f9d84affc5 (diff)
downloadupstream-ea91ee13a72c1afdf2bceef528c807e3dddf63a9.tar.gz
upstream-ea91ee13a72c1afdf2bceef528c807e3dddf63a9.tar.bz2
upstream-ea91ee13a72c1afdf2bceef528c807e3dddf63a9.zip
apm821xx: dw_dmac: backport fixes and cleanups from 4.7
This patch fixes the dw_dmac dma engine which is used by the SATA controllers in the MyBook Live Series and WNDR4700. The code was backported from the upstream kernel. It can be dropped completely on 4.7+. Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Diffstat (limited to 'target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch')
-rw-r--r--target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch1522
1 files changed, 1522 insertions, 0 deletions
diff --git a/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
new file mode 100644
index 0000000000..96b11a82b6
--- /dev/null
+++ b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
@@ -0,0 +1,1522 @@
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems)
+Date: Mon, 25 Apr 2016 15:35:05 +0300
+
+This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html)
+contains a number of mostly minor fixes and cleanups for the DW DMA driver. A
+couple of them affect the DT binding so these may need to be updated to
+maintain compatibility (old format is still supported though). The rest should
+be relatively straight-forward.
+
+This version has been tested on the following bare metal platforms:
+- ATNGW100 (avr32 based platform) with dmatest
+- Sam460ex (powerpc 44x based platform) with SATA
+- Intel Braswell with UART
+- Intel Galileo (Intel Quark based platform) with UART
+
+(SATA driver and Intel Galileo UART support are based on this series and just
+ published recently for a review)
+
+Vinod, there are few patch sets developed on top of this one, so, the idea is
+to keep this in an immuutable branch / tag.
+
+Changes since v5:
+- fixed an issue found by kbuildbot
+
+Changes since v4:
+- send proper set of patches
+- add changelog
+
+Changes since v3:
+- add patch 1 to check value of dma-masters property
+- drop the upstreamed patches
+- update patch 2 to keep an array for data-width property as well
+
+Changes since v2:
+- add patch 1 to fix master selection which was broken for long time
+- remove "use field-by-field initialization" patch since like Mans metioned in
+ has mostly no value and even might increase error prone
+- rebase on top of recent linux-next
+- wide testing on several platforms
+
+Changes since v1:
+- zeroing struct dw_dma_slave before use
+- fall back to old data_width property if data-width is not found
+- append tags for few patches
+- correct title of cover letter
+- rebase on top of recent linux-next
+
+Andy Shevchenko (4):
+ dmaengine: dw: platform: check nr_masters to be non-zero
+ dmaengine: dw: revisit data_width property
+ dmaengine: dw: keep entire platform data in struct dw_dma
+ dmaengine: dw: pass platform data via struct dw_dma_chip
+
+ Documentation/devicetree/bindings/dma/snps-dma.txt | 6 +-
+ arch/arc/boot/dts/abilis_tb10x.dtsi | 2 +-
+ arch/arm/boot/dts/spear13xx.dtsi | 4 +-
+ drivers/ata/sata_dwc_460ex.c | 2 +-
+ drivers/dma/dw/core.c | 75 ++++++++--------------
+ drivers/dma/dw/pci.c | 5 +-
+ drivers/dma/dw/platform.c | 32 +++++----
+ drivers/dma/dw/regs.h | 5 +-
+ include/linux/dma/dw.h | 5 +-
+ include/linux/platform_data/dma-dw.h | 4 +-
+ sound/soc/intel/common/sst-firmware.c | 2 +-
+ 11 files changed, 64 insertions(+), 78 deletions(-)
+
+--- a/drivers/dma/dw/core.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/core.c 2016-05-21 22:47:08.665465180 +0200
+@@ -45,22 +45,19 @@
+ DW_DMA_MSIZE_16; \
+ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \
+ DW_DMA_MSIZE_16; \
++ u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \
++ _dwc->p_master : _dwc->m_master; \
++ u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \
++ _dwc->p_master : _dwc->m_master; \
+ \
+ (DWC_CTLL_DST_MSIZE(_dmsize) \
+ | DWC_CTLL_SRC_MSIZE(_smsize) \
+ | DWC_CTLL_LLP_D_EN \
+ | DWC_CTLL_LLP_S_EN \
+- | DWC_CTLL_DMS(_dwc->dst_master) \
+- | DWC_CTLL_SMS(_dwc->src_master)); \
++ | DWC_CTLL_DMS(_dms) \
++ | DWC_CTLL_SMS(_sms)); \
+ })
+
+-/*
+- * Number of descriptors to allocate for each channel. This should be
+- * made configurable somehow; preferably, the clients (at least the
+- * ones using slave transfers) should be able to give us a hint.
+- */
+-#define NR_DESCS_PER_CHANNEL 64
+-
+ /* The set of bus widths supported by the DMA controller */
+ #define DW_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+@@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(
+ return to_dw_desc(dwc->active_list.next);
+ }
+
+-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+ {
+- struct dw_desc *desc, *_desc;
+- struct dw_desc *ret = NULL;
+- unsigned int i = 0;
+- unsigned long flags;
++ struct dw_desc *desc = txd_to_dw_desc(tx);
++ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
++ dma_cookie_t cookie;
++ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+- i++;
+- if (async_tx_test_ack(&desc->txd)) {
+- list_del(&desc->desc_node);
+- ret = desc;
+- break;
+- }
+- dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
+- }
++ cookie = dma_cookie_assign(tx);
++
++ /*
++ * REVISIT: We should attempt to chain as many descriptors as
++ * possible, perhaps even appending to those already submitted
++ * for DMA. But this is hard to do in a race-free manner.
++ */
++
++ list_add_tail(&desc->desc_node, &dwc->queue);
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
++ __func__, desc->txd.cookie);
+
+- dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
++ return cookie;
++}
+
+- return ret;
++static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++{
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *desc;
++ dma_addr_t phys;
++
++ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
++ if (!desc)
++ return NULL;
++
++ dwc->descs_allocated++;
++ INIT_LIST_HEAD(&desc->tx_list);
++ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
++ desc->txd.tx_submit = dwc_tx_submit;
++ desc->txd.flags = DMA_CTRL_ACK;
++ desc->txd.phys = phys;
++ return desc;
+ }
+
+-/*
+- * Move a descriptor, including any children, to the free list.
+- * `desc' must not be on any lists.
+- */
+ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+- unsigned long flags;
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *child, *_next;
+
+- if (desc) {
+- struct dw_desc *child;
++ if (unlikely(!desc))
++ return;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry(child, &desc->tx_list, desc_node)
+- dev_vdbg(chan2dev(&dwc->chan),
+- "moving child desc %p to freelist\n",
+- child);
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
+- list_add(&desc->desc_node, &dwc->free_list);
+- spin_unlock_irqrestore(&dwc->lock, flags);
++ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
++ list_del(&child->desc_node);
++ dma_pool_free(dw->desc_pool, child, child->txd.phys);
++ dwc->descs_allocated--;
+ }
++
++ dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
++ dwc->descs_allocated--;
+ }
+
+ static void dwc_initialize(struct dw_dma_chan *dwc)
+@@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma
+ u32 cfghi = DWC_CFGH_FIFO_MODE;
+ u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+
+- if (dwc->initialized == true)
++ if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
+ return;
+
+ cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
+@@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+- dwc->initialized = true;
++ set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+ }
+
+ /*----------------------------------------------------------------------*/
+
+-static inline unsigned int dwc_fast_ffs(unsigned long long v)
+-{
+- /*
+- * We can be a lot more clever here, but this should take care
+- * of the most common optimization.
+- */
+- if (!(v & 7))
+- return 3;
+- else if (!(v & 3))
+- return 2;
+- else if (!(v & 1))
+- return 1;
+- return 0;
+-}
+-
+ static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+ {
+ dev_err(chan2dev(&dwc->chan),
+@@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s
+ * Software emulation of LLP mode relies on interrupts to continue
+ * multi block transfer.
+ */
+- ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
++ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
+
+- channel_writel(dwc, SAR, desc->lli.sar);
+- channel_writel(dwc, DAR, desc->lli.dar);
++ channel_writel(dwc, SAR, lli_read(desc, sar));
++ channel_writel(dwc, DAR, lli_read(desc, dar));
+ channel_writel(dwc, CTL_LO, ctllo);
+- channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
++ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ /* Move pointer to next descriptor */
+@@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s
+ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+ {
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_soft_llp;
+
+ /* ASSERT: channel is idle */
+@@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- dwc->residue = first->total_len;
++ first->residue = first->total_len;
+ dwc->tx_node_active = &first->tx_list;
+
+ /* Submit first block */
+@@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- channel_writel(dwc, LLP, first->txd.phys);
+- channel_writel(dwc, CTL_LO,
+- DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
++ channel_writel(dwc, LLP, first->txd.phys | lms);
++ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+ }
+@@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ async_tx_ack(&child->txd);
+ async_tx_ack(&desc->txd);
+-
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- list_move(&desc->desc_node, &dwc->free_list);
+-
+- dma_descriptor_unmap(txd);
++ dwc_desc_put(dwc, desc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ if (callback)
+@@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct
+
+ head = &desc->tx_list;
+ if (active != head) {
+- /* Update desc to reflect last sent one */
+- if (active != head->next)
+- desc = to_dw_desc(active->prev);
+-
+- dwc->residue -= desc->len;
++ /* Update residue to reflect last sent descriptor */
++ if (active == head->next)
++ desc->residue -= desc->len;
++ else
++ desc->residue -= to_dw_desc(active->prev)->len;
+
+ child = to_dw_desc(active);
+
+@@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct
+ clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+ }
+
+- dwc->residue = 0;
+-
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ dwc_complete_all(dw, dwc);
+@@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct
+ }
+
+ if (list_empty(&dwc->active_list)) {
+- dwc->residue = 0;
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+@@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ /* Initial residue value */
+- dwc->residue = desc->total_len;
++ desc->residue = desc->total_len;
+
+ /* Check first descriptors addr */
+- if (desc->txd.phys == llp) {
++ if (desc->txd.phys == DWC_LLP_LOC(llp)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ /* Check first descriptors llp */
+- if (desc->lli.llp == llp) {
++ if (lli_read(desc, llp) == llp) {
+ /* This one is currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+- dwc->residue -= desc->len;
++ desc->residue -= desc->len;
+ list_for_each_entry(child, &desc->tx_list, desc_node) {
+- if (child->lli.llp == llp) {
++ if (lli_read(child, llp) == llp) {
+ /* Currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+- dwc->residue -= child->len;
++ desc->residue -= child->len;
+ }
+
+ /*
+@@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+
+-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
++static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+ dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+- lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
++ lli_read(desc, sar),
++ lli_read(desc, dar),
++ lli_read(desc, llp),
++ lli_read(desc, ctlhi),
++ lli_read(desc, ctllo));
+ }
+
+ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+@@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d
+ */
+ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
+ " cookie: %d\n", bad_desc->txd.cookie);
+- dwc_dump_lli(dwc, &bad_desc->lli);
++ dwc_dump_lli(dwc, bad_desc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+- dwc_dump_lli(dwc, &child->lli);
++ dwc_dump_lli(dwc, child);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_
+ */
+ if (unlikely(status_err & dwc->mask) ||
+ unlikely(status_xfer & dwc->mask)) {
+- int i;
++ unsigned int i;
+
+ dev_err(chan2dev(&dwc->chan),
+ "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
+@@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ for (i = 0; i < dwc->cdesc->periods; i++)
+- dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
++ dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+@@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long
+ u32 status_block;
+ u32 status_xfer;
+ u32 status_err;
+- int i;
++ unsigned int i;
+
+ status_block = dma_readl(dw, RAW.BLOCK);
+ status_xfer = dma_readl(dw, RAW.XFER);
+@@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long
+ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+ {
+ struct dw_dma *dw = dev_id;
+- u32 status = dma_readl(dw, STATUS_INT);
++ u32 status;
++
++ /* Check if we have any interrupt from the DMAC which is not in use */
++ if (!dw->in_use)
++ return IRQ_NONE;
+
++ status = dma_readl(dw, STATUS_INT);
+ dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+ /* Check if we have any interrupt from the DMAC */
+- if (!status || !dw->in_use)
++ if (!status)
+ return IRQ_NONE;
+
+ /*
+@@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int
+
+ /*----------------------------------------------------------------------*/
+
+-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+-{
+- struct dw_desc *desc = txd_to_dw_desc(tx);
+- struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+- dma_cookie_t cookie;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- cookie = dma_cookie_assign(tx);
+-
+- /*
+- * REVISIT: We should attempt to chain as many descriptors as
+- * possible, perhaps even appending to those already submitted
+- * for DMA. But this is hard to do in a race-free manner.
+- */
+-
+- dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
+- list_add_tail(&desc->desc_node, &dwc->queue);
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- return cookie;
+-}
+-
+ static struct dma_async_tx_descriptor *
+ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+@@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
++ u8 m_master = dwc->m_master;
+ unsigned int src_width;
+ unsigned int dst_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ u32 ctllo;
++ u8 lms = DWC_LLP_LMS(m_master);
+
+ dev_vdbg(chan2dev(chan),
+ "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
+@@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+
+ dwc->direction = DMA_MEM_TO_MEM;
+
+- data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
+- dw->data_width[dwc->dst_master]);
+-
+- src_width = dst_width = min_t(unsigned int, data_width,
+- dwc_fast_ffs(src | dest | len));
++ src_width = dst_width = __ffs(data_width | src | dest | len);
+
+ ctllo = DWC_DEFAULT_CTLLO(chan)
+ | DWC_CTLL_DST_WIDTH(dst_width)
+@@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = src + offset;
+- desc->lli.dar = dest + offset;
+- desc->lli.ctllo = ctllo;
+- desc->lli.ctlhi = xfer_count;
++ lli_write(desc, sar, src + offset);
++ lli_write(desc, dar, dest + offset);
++ lli_write(desc, ctllo, ctllo);
++ lli_write(desc, ctlhi, xfer_count);
+ desc->len = xfer_count << src_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->txd.flags = flags;
+ first->total_len = len;
+
+@@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo;
++ u8 m_master = dwc->m_master;
++ u8 lms = DWC_LLP_LMS(m_master);
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+@@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+ DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+- data_width = dw->data_width[dwc->src_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_todev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = mem;
+- desc->lli.dar = reg;
+- desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
++ lli_write(desc, sar, mem);
++ lli_write(desc, dar, reg);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
+ if ((len >> mem_width) > dwc->block_size) {
+ dlen = dwc->block_size << mem_width;
+ mem += dlen;
+@@ -826,15 +797,14 @@ slave_sg_todev_fill_desc:
+ len = 0;
+ }
+
+- desc->lli.ctlhi = dlen >> mem_width;
++ lli_write(desc, ctlhi, dlen >> mem_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -854,8 +824,6 @@ slave_sg_todev_fill_desc:
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+ DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+- data_width = dw->data_width[dwc->dst_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -863,17 +831,16 @@ slave_sg_todev_fill_desc:
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_fromdev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = reg;
+- desc->lli.dar = mem;
+- desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
++ lli_write(desc, sar, reg);
++ lli_write(desc, dar, mem);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
+ if ((len >> reg_width) > dwc->block_size) {
+ dlen = dwc->block_size << reg_width;
+ mem += dlen;
+@@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc:
+ dlen = len;
+ len = 0;
+ }
+- desc->lli.ctlhi = dlen >> reg_width;
++ lli_write(desc, ctlhi, dlen >> reg_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc:
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->total_len = total_len;
+
+ return &first->txd;
+@@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan
+ dwc->src_id = dws->src_id;
+ dwc->dst_id = dws->dst_id;
+
+- dwc->src_master = dws->src_master;
+- dwc->dst_master = dws->dst_master;
++ dwc->m_master = dws->m_master;
++ dwc->p_master = dws->p_master;
+
+ return true;
+ }
+@@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch
+ while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+ udelay(2);
+
+- dwc->paused = true;
++ set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc
+
+ channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+
+- dwc->paused = false;
++ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+ }
+
+ static int dwc_resume(struct dma_chan *chan)
+@@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+- if (!dwc->paused)
+- return 0;
+-
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- dwc_chan_resume(dwc);
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
++ dwc_chan_resume(dwc);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_
+ return 0;
+ }
+
+-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc)
++static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
++{
++ struct dw_desc *desc;
++
++ list_for_each_entry(desc, &dwc->active_list, desc_node)
++ if (desc->txd.cookie == c)
++ return desc;
++
++ return NULL;
++}
++
++static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+ {
++ struct dw_desc *desc;
+ unsigned long flags;
+ u32 residue;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- residue = dwc->residue;
+- if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+- residue -= dwc_get_sent(dwc);
++ desc = dwc_find_desc(dwc, cookie);
++ if (desc) {
++ if (desc == dwc_first_active(dwc)) {
++ residue = desc->residue;
++ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
++ residue -= dwc_get_sent(dwc);
++ } else {
++ residue = desc->total_len;
++ }
++ } else {
++ residue = 0;
++ }
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return residue;
+@@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan,
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+- if (ret != DMA_COMPLETE)
+- dma_set_residue(txstate, dwc_get_residue(dwc));
++ if (ret == DMA_COMPLETE)
++ return ret;
++
++ dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+- if (dwc->paused && ret == DMA_IN_PROGRESS)
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
+ return DMA_PAUSED;
+
+ return ret;
+@@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma
+
+ static void dw_dma_off(struct dw_dma *dw)
+ {
+- int i;
++ unsigned int i;
+
+ dma_writel(dw, CFG, 0);
+
+@@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw
+ cpu_relax();
+
+ for (i = 0; i < dw->dma.chancnt; i++)
+- dw->chan[i].initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
+ }
+
+ static void dw_dma_on(struct dw_dma *dw)
+@@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc;
+- int i;
+- unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+@@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru
+ dw_dma_on(dw);
+ dw->in_use |= dwc->mask;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = dwc->descs_allocated;
+- while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+- dma_addr_t phys;
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys);
+- if (!desc)
+- goto err_desc_alloc;
+-
+- memset(desc, 0, sizeof(struct dw_desc));
+-
+- INIT_LIST_HEAD(&desc->tx_list);
+- dma_async_tx_descriptor_init(&desc->txd, chan);
+- desc->txd.tx_submit = dwc_tx_submit;
+- desc->txd.flags = DMA_CTRL_ACK;
+- desc->txd.phys = phys;
+-
+- dwc_desc_put(dwc, desc);
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = ++dwc->descs_allocated;
+- }
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+-
+- return i;
+-
+-err_desc_alloc:
+- dev_info(chan2dev(chan), "only allocated %d descriptors\n", i);
+-
+- return i;
++ return 0;
+ }
+
+ static void dwc_free_chan_resources(struct dma_chan *chan)
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+@@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_splice_init(&dwc->free_list, &list);
+- dwc->descs_allocated = 0;
+
+ /* Clear custom channel configuration */
+ dwc->src_id = 0;
+ dwc->dst_id = 0;
+
+- dwc->src_master = 0;
+- dwc->dst_master = 0;
++ dwc->m_master = 0;
++ dwc->p_master = 0;
+
+- dwc->initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+@@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru
+ if (!dw->in_use)
+ dw_dma_off(dw);
+
+- list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+- dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+- dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+- }
+-
+ dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
+ }
+
+@@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+ struct dw_cyclic_desc *retval = NULL;
+ struct dw_desc *desc;
+ struct dw_desc *last = NULL;
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods;
+@@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ retval = ERR_PTR(-ENOMEM);
+
+- if (periods > NR_DESCS_PER_CHANNEL)
+- goto out_err;
+-
+ cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+ if (!cdesc)
+ goto out_err;
+@@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+- desc->lli.dar = sconfig->dst_addr;
+- desc->lli.sar = buf_addr + (period_len * i);
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_FIX
+- | DWC_CTLL_SRC_INC
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+- DWC_CTLL_FC(DW_DMA_FC_D_M2P);
++ lli_write(desc, dar, sconfig->dst_addr);
++ lli_write(desc, sar, buf_addr + period_len * i);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_FIX
++ | DWC_CTLL_SRC_INC
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
++ DWC_CTLL_FC(DW_DMA_FC_D_M2P));
+
+ break;
+ case DMA_DEV_TO_MEM:
+- desc->lli.dar = buf_addr + (period_len * i);
+- desc->lli.sar = sconfig->src_addr;
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_DST_INC
+- | DWC_CTLL_SRC_FIX
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+- DWC_CTLL_FC(DW_DMA_FC_D_P2M);
++ lli_write(desc, dar, buf_addr + period_len * i);
++ lli_write(desc, sar, sconfig->src_addr);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_DST_INC
++ | DWC_CTLL_SRC_FIX
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
++ DWC_CTLL_FC(DW_DMA_FC_D_P2M));
+
+ break;
+ default:
+ break;
+ }
+
+- desc->lli.ctlhi = (period_len >> reg_width);
++ lli_write(desc, ctlhi, period_len >> reg_width);
+ cdesc->desc[i] = desc;
+
+ if (last)
+- last->lli.llp = desc->txd.phys;
++ lli_write(last, llp, desc->txd.phys | lms);
+
+ last = desc;
+ }
+
+ /* Let's make a cyclic list */
+- last->lli.llp = cdesc->desc[0]->txd.phys;
++ lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
+
+ dev_dbg(chan2dev(&dwc->chan),
+ "cyclic prepared buf %pad len %zu period %zu periods %d\n",
+@@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_cyclic_desc *cdesc = dwc->cdesc;
+- int i;
++ unsigned int i;
+ unsigned long flags;
+
+ dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
+@@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan
+ kfree(cdesc->desc);
+ kfree(cdesc);
+
++ dwc->cdesc = NULL;
++
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ }
+ EXPORT_SYMBOL(dw_dma_cyclic_free);
+
+ /*----------------------------------------------------------------------*/
+
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
++int dw_dma_probe(struct dw_dma_chip *chip)
+ {
++ struct dw_dma_platform_data *pdata;
+ struct dw_dma *dw;
+ bool autocfg = false;
+ unsigned int dw_params;
+- unsigned int max_blk_size = 0;
++ unsigned int i;
+ int err;
+- int i;
+
+ dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
++ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
++ if (!dw->pdata)
++ return -ENOMEM;
++
+ dw->regs = chip->regs;
+ chip->dw = dw;
+
+ pm_runtime_get_sync(chip->dev);
+
+- if (!pdata) {
+- dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
++ if (!chip->pdata) {
++ dw_params = dma_readl(dw, DW_PARAMS);
+ dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+
+ autocfg = dw_params >> DW_PARAMS_EN & 1;
+@@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
+- if (!pdata) {
+- err = -ENOMEM;
+- goto err_pdata;
+- }
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+
+ /* Get hardware configuration parameters */
+ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+ pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+ for (i = 0; i < pdata->nr_masters; i++) {
+ pdata->data_width[i] =
+- (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
++ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
+ }
+- max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
++ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
+
+ /* Fill platform data with the default values */
+ pdata->is_private = true;
+ pdata->is_memcpy = true;
+ pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
+ pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
+- } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
++ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+ err = -EINVAL;
+ goto err_pdata;
++ } else {
++ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
++
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+ }
+
+ dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
+@@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- /* Get hardware configuration parameters */
+- dw->nr_masters = pdata->nr_masters;
+- for (i = 0; i < dw->nr_masters; i++)
+- dw->data_width[i] = pdata->data_width[i];
+-
+ /* Calculate all channel mask before DMA setup */
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+@@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+- INIT_LIST_HEAD(&dwc->free_list);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+@@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ /* Hardware configuration */
+ if (autocfg) {
+- unsigned int dwc_params;
+ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+- void __iomem *addr = chip->regs + r * sizeof(u32);
+-
+- dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
++ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
++ unsigned int dwc_params = dma_readl_native(addr);
+
+ dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
+ dwc_params);
+@@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ * up to 0x0a for 4095.
+ */
+ dwc->block_size =
+- (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
++ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+ dwc->nollp =
+ (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+ } else {
+ dwc->block_size = pdata->block_size;
+
+ /* Check if channel supports multi block transfer */
+- channel_writel(dwc, LLP, 0xfffffffc);
+- dwc->nollp =
+- (channel_readl(dwc, LLP) & 0xfffffffc) == 0;
++ channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
++ dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
+ channel_writel(dwc, LLP, 0);
+ }
+ }
+--- a/drivers/dma/dw/pci.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/pci.c 2016-05-21 22:47:08.665465180 +0200
+@@ -17,8 +17,8 @@
+
+ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
+ {
++ const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ struct dw_dma_chip *chip;
+- struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+@@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *
+ chip->dev = &pdev->dev;
+ chip->regs = pcim_iomap_table(pdev)[0];
+ chip->irq = pdev->irq;
++ chip->pdata = pdata;
+
+- ret = dw_dma_probe(chip, pdata);
++ ret = dw_dma_probe(chip);
+ if (ret)
+ return ret;
+
+@@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci
+
+ /* Haswell */
+ { PCI_VDEVICE(INTEL, 0x9c60) },
++
++ /* Broadwell */
++ { PCI_VDEVICE(INTEL, 0x9ce0) },
++
+ { }
+ };
+ MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
+--- a/drivers/dma/dw/platform.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/platform.c 2016-05-21 22:47:08.665465180 +0200
+@@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(
+
+ slave.src_id = dma_spec->args[0];
+ slave.dst_id = dma_spec->args[0];
+- slave.src_master = dma_spec->args[1];
+- slave.dst_master = dma_spec->args[2];
++ slave.m_master = dma_spec->args[1];
++ slave.p_master = dma_spec->args[2];
+
+ if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
+ slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
+- slave.src_master >= dw->nr_masters ||
+- slave.dst_master >= dw->nr_masters))
++ slave.m_master >= dw->pdata->nr_masters ||
++ slave.p_master >= dw->pdata->nr_masters))
+ return NULL;
+
+ dma_cap_zero(cap);
+@@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm
+ .dma_dev = dma_spec->dev,
+ .src_id = dma_spec->slave_id,
+ .dst_id = dma_spec->slave_id,
+- .src_master = 1,
+- .dst_master = 0,
++ .m_master = 0,
++ .p_master = 1,
+ };
+
+ return dw_dma_filter(chan, &slave);
+@@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device *
+ struct device_node *np = pdev->dev.of_node;
+ struct dw_dma_platform_data *pdata;
+ u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
++ u32 nr_masters;
++ u32 nr_channels;
+
+ if (!np) {
+ dev_err(&pdev->dev, "Missing DT data\n");
+ return NULL;
+ }
+
++ if (of_property_read_u32(np, "dma-masters", &nr_masters))
++ return NULL;
++ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
++ return NULL;
++
++ if (of_property_read_u32(np, "dma-channels", &nr_channels))
++ return NULL;
++
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return NULL;
+
+- if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
+- return NULL;
++ pdata->nr_masters = nr_masters;
++ pdata->nr_channels = nr_channels;
+
+ if (of_property_read_bool(np, "is_private"))
+ pdata->is_private = true;
+@@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *
+ if (!of_property_read_u32(np, "block_size", &tmp))
+ pdata->block_size = tmp;
+
+- if (!of_property_read_u32(np, "dma-masters", &tmp)) {
+- if (tmp > DW_DMA_MAX_NR_MASTERS)
+- return NULL;
+-
+- pdata->nr_masters = tmp;
+- }
+-
+- if (!of_property_read_u32_array(np, "data_width", arr,
+- pdata->nr_masters))
+- for (tmp = 0; tmp < pdata->nr_masters; tmp++)
++ if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
+ pdata->data_width[tmp] = arr[tmp];
++ } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
++ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
++ }
+
+ return pdata;
+ }
+@@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi
+ struct dw_dma_chip *chip;
+ struct device *dev = &pdev->dev;
+ struct resource *mem;
+- const struct acpi_device_id *id;
+- struct dw_dma_platform_data *pdata;
++ const struct dw_dma_platform_data *pdata;
+ int err;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+@@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi
+ pdata = dev_get_platdata(dev);
+ if (!pdata)
+ pdata = dw_dma_parse_dt(pdev);
+- if (!pdata && has_acpi_companion(dev)) {
+- id = acpi_match_device(dev->driver->acpi_match_table, dev);
+- if (id)
+- pdata = (struct dw_dma_platform_data *)id->driver_data;
+- }
+
+ chip->dev = dev;
++ chip->pdata = pdata;
+
+ chip->clk = devm_clk_get(chip->dev, "hclk");
+ if (IS_ERR(chip->clk))
+@@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi
+
+ pm_runtime_enable(&pdev->dev);
+
+- err = dw_dma_probe(chip, pdata);
++ err = dw_dma_probe(chip);
+ if (err)
+ goto err_dw_dma_probe;
+
+@@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_
+ {
+ struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+
++ /*
++ * We have to call dw_dma_disable() to stop any ongoing transfer. On
++ * some platforms we can't do that since DMA device is powered off.
++ * Moreover we have no possibility to check if the platform is affected
++ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
++ * unconditionally. On the other hand we can't use
++ * pm_runtime_suspended() because runtime PM framework is not fully
++ * used by the driver.
++ */
++ pm_runtime_get_sync(chip->dev);
+ dw_dma_disable(chip);
++ pm_runtime_put_sync_suspend(chip->dev);
++
+ clk_disable_unprepare(chip->clk);
+ }
+
+@@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab
+ #endif
+
+ #ifdef CONFIG_ACPI
+-static struct dw_dma_platform_data dw_dma_acpi_pdata = {
+- .nr_channels = 8,
+- .is_private = true,
+- .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+- .chan_priority = CHAN_PRIORITY_ASCENDING,
+- .block_size = 4095,
+- .nr_masters = 2,
+-};
+-
+ static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+- { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata },
++ { "INTL9C60", 0 },
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
+--- a/drivers/dma/dw/regs.h 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/regs.h 2016-05-21 22:47:08.665465180 +0200
+@@ -114,10 +114,6 @@ struct dw_dma_regs {
+ #define dma_writel_native writel
+ #endif
+
+-/* To access the registers in early stage of probe */
+-#define dma_read_byaddr(addr, name) \
+- dma_readl_native((addr) + offsetof(struct dw_dma_regs, name))
+-
+ /* Bitfields in DW_PARAMS */
+ #define DW_PARAMS_NR_CHAN 8 /* number of channels */
+ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */
+@@ -143,6 +139,10 @@ enum dw_dma_msize {
+ DW_DMA_MSIZE_256,
+ };
+
++/* Bitfields in LLP */
++#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */
++#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */
++
+ /* Bitfields in CTL_LO */
+ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+@@ -150,7 +150,7 @@ enum dw_dma_msize {
+ #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+ #define DWC_CTLL_DST_DEC (1<<7)
+ #define DWC_CTLL_DST_FIX (2<<7)
+-#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
++#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */
+ #define DWC_CTLL_SRC_DEC (1<<9)
+ #define DWC_CTLL_SRC_FIX (2<<9)
+ #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+@@ -216,6 +216,8 @@ enum dw_dma_msize {
+ enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+ DW_DMA_IS_SOFT_LLP = 1,
++ DW_DMA_IS_PAUSED = 2,
++ DW_DMA_IS_INITIALIZED = 3,
+ };
+
+ struct dw_dma_chan {
+@@ -224,8 +226,6 @@ struct dw_dma_chan {
+ u8 mask;
+ u8 priority;
+ enum dma_transfer_direction direction;
+- bool paused;
+- bool initialized;
+
+ /* software emulation of the LLP transfers */
+ struct list_head *tx_node_active;
+@@ -236,8 +236,6 @@ struct dw_dma_chan {
+ unsigned long flags;
+ struct list_head active_list;
+ struct list_head queue;
+- struct list_head free_list;
+- u32 residue;
+ struct dw_cyclic_desc *cdesc;
+
+ unsigned int descs_allocated;
+@@ -249,8 +247,8 @@ struct dw_dma_chan {
+ /* custom slave configuration */
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+
+ /* configuration passed via .device_config */
+ struct dma_slave_config dma_sconfig;
+@@ -283,9 +281,8 @@ struct dw_dma {
+ u8 all_chan_mask;
+ u8 in_use;
+
+- /* hardware configuration */
+- unsigned char nr_masters;
+- unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
++ /* platform data */
++ struct dw_dma_platform_data *pdata;
+ };
+
+ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+@@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s
+ return container_of(ddev, struct dw_dma, dma);
+ }
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++typedef __be32 __dw32;
++#else
++typedef __le32 __dw32;
++#endif
++
+ /* LLI == Linked List Item; a.k.a. DMA block descriptor */
+ struct dw_lli {
+ /* values that are not changed by hardware */
+- u32 sar;
+- u32 dar;
+- u32 llp; /* chain to next lli */
+- u32 ctllo;
++ __dw32 sar;
++ __dw32 dar;
++ __dw32 llp; /* chain to next lli */
++ __dw32 ctllo;
+ /* values that may get written back: */
+- u32 ctlhi;
++ __dw32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+- u32 sstat;
+- u32 dstat;
++ __dw32 sstat;
++ __dw32 dstat;
+ };
+
+ struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v))
++#define lli_read(d, reg) be32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v))
++#else
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v))
++#define lli_read(d, reg) le32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v))
++#endif
++
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+ size_t total_len;
++ u32 residue;
+ };
+
+ #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node)
+--- a/include/linux/dma/dw.h
++++ b/include/linux/dma/dw.h
+@@ -27,6 +27,7 @@ struct dw_dma;
+ * @regs: memory mapped I/O space
+ * @clk: hclk clock
+ * @dw: struct dw_dma that is filed by dw_dma_probe()
++ * @pdata: pointer to platform data
+ */
+ struct dw_dma_chip {
+ struct device *dev;
+@@ -34,10 +35,12 @@ struct dw_dma_chip {
+ void __iomem *regs;
+ struct clk *clk;
+ struct dw_dma *dw;
++
++ const struct dw_dma_platform_data *pdata;
+ };
+
+ /* Export to the platform drivers */
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
++int dw_dma_probe(struct dw_dma_chip *chip);
+ int dw_dma_remove(struct dw_dma_chip *chip);
+
+ /* DMA API extensions */
+diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
+index 03b6095..d15d8ba 100644
+--- a/include/linux/platform_data/dma-dw.h
++++ b/include/linux/platform_data/dma-dw.h
+@@ -21,15 +21,15 @@
+ * @dma_dev: required DMA master device
+ * @src_id: src request line
+ * @dst_id: dst request line
+- * @src_master: src master for transfers on allocated channel.
+- * @dst_master: dest master for transfers on allocated channel.
++ * @m_master: memory master for transfers on allocated channel
++ * @p_master: peripheral master for transfers on allocated channel
+ */
+ struct dw_dma_slave {
+ struct device *dma_dev;
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+ };
+
+ /**
+@@ -43,7 +43,7 @@ struct dw_dma_slave {
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+- * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
++ * (in bytes, power of 2)
+ */
+ struct dw_dma_platform_data {
+ unsigned int nr_channels;
+@@ -55,7 +55,7 @@ struct dw_dma_platform_data {
+ #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */
+ #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */
+ unsigned char chan_priority;
+- unsigned short block_size;
++ unsigned int block_size;
+ unsigned char nr_masters;
+ unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
+ };
+--
+2.8.1
+