aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch')
-rw-r--r--target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch1522
1 files changed, 1522 insertions, 0 deletions
diff --git a/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
new file mode 100644
index 0000000000..96b11a82b6
--- /dev/null
+++ b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch
@@ -0,0 +1,1522 @@
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems)
+Date: Mon, 25 Apr 2016 15:35:05 +0300
+
+This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html)
+contains a number of mostly minor fixes and cleanups for the DW DMA driver. A
+couple of them affect the DT binding so these may need to be updated to
+maintain compatibility (old format is still supported though). The rest should
+be relatively straight-forward.
+
+This version has been tested on the following bare metal platforms:
+- ATNGW100 (avr32 based platform) with dmatest
+- Sam460ex (powerpc 44x based platform) with SATA
+- Intel Braswell with UART
+- Intel Galileo (Intel Quark based platform) with UART
+
+(SATA driver and Intel Galileo UART support are based on this series and just
+ published recently for a review)
+
+Vinod, there are few patch sets developed on top of this one, so, the idea is
+to keep this in an immuutable branch / tag.
+
+Changes since v5:
+- fixed an issue found by kbuildbot
+
+Changes since v4:
+- send proper set of patches
+- add changelog
+
+Changes since v3:
+- add patch 1 to check value of dma-masters property
+- drop the upstreamed patches
+- update patch 2 to keep an array for data-width property as well
+
+Changes since v2:
+- add patch 1 to fix master selection which was broken for long time
+- remove "use field-by-field initialization" patch since like Mans metioned in
+ has mostly no value and even might increase error prone
+- rebase on top of recent linux-next
+- wide testing on several platforms
+
+Changes since v1:
+- zeroing struct dw_dma_slave before use
+- fall back to old data_width property if data-width is not found
+- append tags for few patches
+- correct title of cover letter
+- rebase on top of recent linux-next
+
+Andy Shevchenko (4):
+ dmaengine: dw: platform: check nr_masters to be non-zero
+ dmaengine: dw: revisit data_width property
+ dmaengine: dw: keep entire platform data in struct dw_dma
+ dmaengine: dw: pass platform data via struct dw_dma_chip
+
+ Documentation/devicetree/bindings/dma/snps-dma.txt | 6 +-
+ arch/arc/boot/dts/abilis_tb10x.dtsi | 2 +-
+ arch/arm/boot/dts/spear13xx.dtsi | 4 +-
+ drivers/ata/sata_dwc_460ex.c | 2 +-
+ drivers/dma/dw/core.c | 75 ++++++++--------------
+ drivers/dma/dw/pci.c | 5 +-
+ drivers/dma/dw/platform.c | 32 +++++----
+ drivers/dma/dw/regs.h | 5 +-
+ include/linux/dma/dw.h | 5 +-
+ include/linux/platform_data/dma-dw.h | 4 +-
+ sound/soc/intel/common/sst-firmware.c | 2 +-
+ 11 files changed, 64 insertions(+), 78 deletions(-)
+
+--- a/drivers/dma/dw/core.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/core.c 2016-05-21 22:47:08.665465180 +0200
+@@ -45,22 +45,19 @@
+ DW_DMA_MSIZE_16; \
+ u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \
+ DW_DMA_MSIZE_16; \
++ u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \
++ _dwc->p_master : _dwc->m_master; \
++ u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \
++ _dwc->p_master : _dwc->m_master; \
+ \
+ (DWC_CTLL_DST_MSIZE(_dmsize) \
+ | DWC_CTLL_SRC_MSIZE(_smsize) \
+ | DWC_CTLL_LLP_D_EN \
+ | DWC_CTLL_LLP_S_EN \
+- | DWC_CTLL_DMS(_dwc->dst_master) \
+- | DWC_CTLL_SMS(_dwc->src_master)); \
++ | DWC_CTLL_DMS(_dms) \
++ | DWC_CTLL_SMS(_sms)); \
+ })
+
+-/*
+- * Number of descriptors to allocate for each channel. This should be
+- * made configurable somehow; preferably, the clients (at least the
+- * ones using slave transfers) should be able to give us a hint.
+- */
+-#define NR_DESCS_PER_CHANNEL 64
+-
+ /* The set of bus widths supported by the DMA controller */
+ #define DW_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+@@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active(
+ return to_dw_desc(dwc->active_list.next);
+ }
+
+-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+ {
+- struct dw_desc *desc, *_desc;
+- struct dw_desc *ret = NULL;
+- unsigned int i = 0;
+- unsigned long flags;
++ struct dw_desc *desc = txd_to_dw_desc(tx);
++ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
++ dma_cookie_t cookie;
++ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+- i++;
+- if (async_tx_test_ack(&desc->txd)) {
+- list_del(&desc->desc_node);
+- ret = desc;
+- break;
+- }
+- dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc);
+- }
++ cookie = dma_cookie_assign(tx);
++
++ /*
++ * REVISIT: We should attempt to chain as many descriptors as
++ * possible, perhaps even appending to those already submitted
++ * for DMA. But this is hard to do in a race-free manner.
++ */
++
++ list_add_tail(&desc->desc_node, &dwc->queue);
+ spin_unlock_irqrestore(&dwc->lock, flags);
++ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
++ __func__, desc->txd.cookie);
+
+- dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i);
++ return cookie;
++}
+
+- return ret;
++static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
++{
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *desc;
++ dma_addr_t phys;
++
++ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
++ if (!desc)
++ return NULL;
++
++ dwc->descs_allocated++;
++ INIT_LIST_HEAD(&desc->tx_list);
++ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
++ desc->txd.tx_submit = dwc_tx_submit;
++ desc->txd.flags = DMA_CTRL_ACK;
++ desc->txd.phys = phys;
++ return desc;
+ }
+
+-/*
+- * Move a descriptor, including any children, to the free list.
+- * `desc' must not be on any lists.
+- */
+ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+- unsigned long flags;
++ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ struct dw_desc *child, *_next;
+
+- if (desc) {
+- struct dw_desc *child;
++ if (unlikely(!desc))
++ return;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- list_for_each_entry(child, &desc->tx_list, desc_node)
+- dev_vdbg(chan2dev(&dwc->chan),
+- "moving child desc %p to freelist\n",
+- child);
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
+- list_add(&desc->desc_node, &dwc->free_list);
+- spin_unlock_irqrestore(&dwc->lock, flags);
++ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
++ list_del(&child->desc_node);
++ dma_pool_free(dw->desc_pool, child, child->txd.phys);
++ dwc->descs_allocated--;
+ }
++
++ dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
++ dwc->descs_allocated--;
+ }
+
+ static void dwc_initialize(struct dw_dma_chan *dwc)
+@@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma
+ u32 cfghi = DWC_CFGH_FIFO_MODE;
+ u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+
+- if (dwc->initialized == true)
++ if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
+ return;
+
+ cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
+@@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+- dwc->initialized = true;
++ set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+ }
+
+ /*----------------------------------------------------------------------*/
+
+-static inline unsigned int dwc_fast_ffs(unsigned long long v)
+-{
+- /*
+- * We can be a lot more clever here, but this should take care
+- * of the most common optimization.
+- */
+- if (!(v & 7))
+- return 3;
+- else if (!(v & 3))
+- return 2;
+- else if (!(v & 1))
+- return 1;
+- return 0;
+-}
+-
+ static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+ {
+ dev_err(chan2dev(&dwc->chan),
+@@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s
+ * Software emulation of LLP mode relies on interrupts to continue
+ * multi block transfer.
+ */
+- ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN;
++ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
+
+- channel_writel(dwc, SAR, desc->lli.sar);
+- channel_writel(dwc, DAR, desc->lli.dar);
++ channel_writel(dwc, SAR, lli_read(desc, sar));
++ channel_writel(dwc, DAR, lli_read(desc, dar));
+ channel_writel(dwc, CTL_LO, ctllo);
+- channel_writel(dwc, CTL_HI, desc->lli.ctlhi);
++ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ /* Move pointer to next descriptor */
+@@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s
+ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+ {
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_soft_llp;
+
+ /* ASSERT: channel is idle */
+@@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- dwc->residue = first->total_len;
++ first->residue = first->total_len;
+ dwc->tx_node_active = &first->tx_list;
+
+ /* Submit first block */
+@@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch
+
+ dwc_initialize(dwc);
+
+- channel_writel(dwc, LLP, first->txd.phys);
+- channel_writel(dwc, CTL_LO,
+- DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
++ channel_writel(dwc, LLP, first->txd.phys | lms);
++ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+ }
+@@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ async_tx_ack(&child->txd);
+ async_tx_ack(&desc->txd);
+-
+- list_splice_init(&desc->tx_list, &dwc->free_list);
+- list_move(&desc->desc_node, &dwc->free_list);
+-
+- dma_descriptor_unmap(txd);
++ dwc_desc_put(dwc, desc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ if (callback)
+@@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct
+
+ head = &desc->tx_list;
+ if (active != head) {
+- /* Update desc to reflect last sent one */
+- if (active != head->next)
+- desc = to_dw_desc(active->prev);
+-
+- dwc->residue -= desc->len;
++ /* Update residue to reflect last sent descriptor */
++ if (active == head->next)
++ desc->residue -= desc->len;
++ else
++ desc->residue -= to_dw_desc(active->prev)->len;
+
+ child = to_dw_desc(active);
+
+@@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct
+ clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+ }
+
+- dwc->residue = 0;
+-
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ dwc_complete_all(dw, dwc);
+@@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct
+ }
+
+ if (list_empty(&dwc->active_list)) {
+- dwc->residue = 0;
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+@@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ /* Initial residue value */
+- dwc->residue = desc->total_len;
++ desc->residue = desc->total_len;
+
+ /* Check first descriptors addr */
+- if (desc->txd.phys == llp) {
++ if (desc->txd.phys == DWC_LLP_LOC(llp)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ /* Check first descriptors llp */
+- if (desc->lli.llp == llp) {
++ if (lli_read(desc, llp) == llp) {
+ /* This one is currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+- dwc->residue -= desc->len;
++ desc->residue -= desc->len;
+ list_for_each_entry(child, &desc->tx_list, desc_node) {
+- if (child->lli.llp == llp) {
++ if (lli_read(child, llp) == llp) {
+ /* Currently in progress */
+- dwc->residue -= dwc_get_sent(dwc);
++ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+- dwc->residue -= child->len;
++ desc->residue -= child->len;
+ }
+
+ /*
+@@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+
+-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
++static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
+ {
+ dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+- lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo);
++ lli_read(desc, sar),
++ lli_read(desc, dar),
++ lli_read(desc, llp),
++ lli_read(desc, ctlhi),
++ lli_read(desc, ctllo));
+ }
+
+ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+@@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d
+ */
+ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
+ " cookie: %d\n", bad_desc->txd.cookie);
+- dwc_dump_lli(dwc, &bad_desc->lli);
++ dwc_dump_lli(dwc, bad_desc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+- dwc_dump_lli(dwc, &child->lli);
++ dwc_dump_lli(dwc, child);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_
+ */
+ if (unlikely(status_err & dwc->mask) ||
+ unlikely(status_xfer & dwc->mask)) {
+- int i;
++ unsigned int i;
+
+ dev_err(chan2dev(&dwc->chan),
+ "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n",
+@@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ for (i = 0; i < dwc->cdesc->periods; i++)
+- dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
++ dwc_dump_lli(dwc, dwc->cdesc->desc[i]);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+@@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long
+ u32 status_block;
+ u32 status_xfer;
+ u32 status_err;
+- int i;
++ unsigned int i;
+
+ status_block = dma_readl(dw, RAW.BLOCK);
+ status_xfer = dma_readl(dw, RAW.XFER);
+@@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long
+ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+ {
+ struct dw_dma *dw = dev_id;
+- u32 status = dma_readl(dw, STATUS_INT);
++ u32 status;
++
++ /* Check if we have any interrupt from the DMAC which is not in use */
++ if (!dw->in_use)
++ return IRQ_NONE;
+
++ status = dma_readl(dw, STATUS_INT);
+ dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+ /* Check if we have any interrupt from the DMAC */
+- if (!status || !dw->in_use)
++ if (!status)
+ return IRQ_NONE;
+
+ /*
+@@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int
+
+ /*----------------------------------------------------------------------*/
+
+-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+-{
+- struct dw_desc *desc = txd_to_dw_desc(tx);
+- struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+- dma_cookie_t cookie;
+- unsigned long flags;
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- cookie = dma_cookie_assign(tx);
+-
+- /*
+- * REVISIT: We should attempt to chain as many descriptors as
+- * possible, perhaps even appending to those already submitted
+- * for DMA. But this is hard to do in a race-free manner.
+- */
+-
+- dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
+- list_add_tail(&desc->desc_node, &dwc->queue);
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- return cookie;
+-}
+-
+ static struct dma_async_tx_descriptor *
+ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+@@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
++ u8 m_master = dwc->m_master;
+ unsigned int src_width;
+ unsigned int dst_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ u32 ctllo;
++ u8 lms = DWC_LLP_LMS(m_master);
+
+ dev_vdbg(chan2dev(chan),
+ "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
+@@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+
+ dwc->direction = DMA_MEM_TO_MEM;
+
+- data_width = min_t(unsigned int, dw->data_width[dwc->src_master],
+- dw->data_width[dwc->dst_master]);
+-
+- src_width = dst_width = min_t(unsigned int, data_width,
+- dwc_fast_ffs(src | dest | len));
++ src_width = dst_width = __ffs(data_width | src | dest | len);
+
+ ctllo = DWC_DEFAULT_CTLLO(chan)
+ | DWC_CTLL_DST_WIDTH(dst_width)
+@@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = src + offset;
+- desc->lli.dar = dest + offset;
+- desc->lli.ctllo = ctllo;
+- desc->lli.ctlhi = xfer_count;
++ lli_write(desc, sar, src + offset);
++ lli_write(desc, dar, dest + offset);
++ lli_write(desc, ctllo, ctllo);
++ lli_write(desc, ctlhi, xfer_count);
+ desc->len = xfer_count << src_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->txd.flags = flags;
+ first->total_len = len;
+
+@@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo;
++ u8 m_master = dwc->m_master;
++ u8 lms = DWC_LLP_LMS(m_master);
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+- unsigned int data_width;
++ unsigned int data_width = dw->pdata->data_width[m_master];
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+@@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+ DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+- data_width = dw->data_width[dwc->src_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan,
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_todev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = mem;
+- desc->lli.dar = reg;
+- desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
++ lli_write(desc, sar, mem);
++ lli_write(desc, dar, reg);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
+ if ((len >> mem_width) > dwc->block_size) {
+ dlen = dwc->block_size << mem_width;
+ mem += dlen;
+@@ -826,15 +797,14 @@ slave_sg_todev_fill_desc:
+ len = 0;
+ }
+
+- desc->lli.ctlhi = dlen >> mem_width;
++ lli_write(desc, ctlhi, dlen >> mem_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -854,8 +824,6 @@ slave_sg_todev_fill_desc:
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+ DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+- data_width = dw->data_width[dwc->dst_master];
+-
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, dlen, mem;
+@@ -863,17 +831,16 @@ slave_sg_todev_fill_desc:
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+- mem_width = min_t(unsigned int,
+- data_width, dwc_fast_ffs(mem | len));
++ mem_width = __ffs(data_width | mem | len);
+
+ slave_sg_fromdev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+- desc->lli.sar = reg;
+- desc->lli.dar = mem;
+- desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
++ lli_write(desc, sar, reg);
++ lli_write(desc, dar, mem);
++ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
+ if ((len >> reg_width) > dwc->block_size) {
+ dlen = dwc->block_size << reg_width;
+ mem += dlen;
+@@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc:
+ dlen = len;
+ len = 0;
+ }
+- desc->lli.ctlhi = dlen >> reg_width;
++ lli_write(desc, ctlhi, dlen >> reg_width);
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+- prev->lli.llp = desc->txd.phys;
+- list_add_tail(&desc->desc_node,
+- &first->tx_list);
++ lli_write(prev, llp, desc->txd.phys | lms);
++ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ total_len += dlen;
+@@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc:
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+- prev->lli.ctllo |= DWC_CTLL_INT_EN;
++ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->total_len = total_len;
+
+ return &first->txd;
+@@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan
+ dwc->src_id = dws->src_id;
+ dwc->dst_id = dws->dst_id;
+
+- dwc->src_master = dws->src_master;
+- dwc->dst_master = dws->dst_master;
++ dwc->m_master = dws->m_master;
++ dwc->p_master = dws->p_master;
+
+ return true;
+ }
+@@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch
+ while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+ udelay(2);
+
+- dwc->paused = true;
++ set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc
+
+ channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+
+- dwc->paused = false;
++ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+ }
+
+ static int dwc_resume(struct dma_chan *chan)
+@@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+- if (!dwc->paused)
+- return 0;
+-
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- dwc_chan_resume(dwc);
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
++ dwc_chan_resume(dwc);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+@@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_
+ return 0;
+ }
+
+-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc)
++static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
++{
++ struct dw_desc *desc;
++
++ list_for_each_entry(desc, &dwc->active_list, desc_node)
++ if (desc->txd.cookie == c)
++ return desc;
++
++ return NULL;
++}
++
++static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+ {
++ struct dw_desc *desc;
+ unsigned long flags;
+ u32 residue;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+- residue = dwc->residue;
+- if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+- residue -= dwc_get_sent(dwc);
++ desc = dwc_find_desc(dwc, cookie);
++ if (desc) {
++ if (desc == dwc_first_active(dwc)) {
++ residue = desc->residue;
++ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
++ residue -= dwc_get_sent(dwc);
++ } else {
++ residue = desc->total_len;
++ }
++ } else {
++ residue = 0;
++ }
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return residue;
+@@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan,
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+- if (ret != DMA_COMPLETE)
+- dma_set_residue(txstate, dwc_get_residue(dwc));
++ if (ret == DMA_COMPLETE)
++ return ret;
++
++ dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+- if (dwc->paused && ret == DMA_IN_PROGRESS)
++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
+ return DMA_PAUSED;
+
+ return ret;
+@@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma
+
+ static void dw_dma_off(struct dw_dma *dw)
+ {
+- int i;
++ unsigned int i;
+
+ dma_writel(dw, CFG, 0);
+
+@@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw
+ cpu_relax();
+
+ for (i = 0; i < dw->dma.chancnt; i++)
+- dw->chan[i].initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
+ }
+
+ static void dw_dma_on(struct dw_dma *dw)
+@@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc;
+- int i;
+- unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+@@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru
+ dw_dma_on(dw);
+ dw->in_use |= dwc->mask;
+
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = dwc->descs_allocated;
+- while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+- dma_addr_t phys;
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys);
+- if (!desc)
+- goto err_desc_alloc;
+-
+- memset(desc, 0, sizeof(struct dw_desc));
+-
+- INIT_LIST_HEAD(&desc->tx_list);
+- dma_async_tx_descriptor_init(&desc->txd, chan);
+- desc->txd.tx_submit = dwc_tx_submit;
+- desc->txd.flags = DMA_CTRL_ACK;
+- desc->txd.phys = phys;
+-
+- dwc_desc_put(dwc, desc);
+-
+- spin_lock_irqsave(&dwc->lock, flags);
+- i = ++dwc->descs_allocated;
+- }
+-
+- spin_unlock_irqrestore(&dwc->lock, flags);
+-
+- dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+-
+- return i;
+-
+-err_desc_alloc:
+- dev_info(chan2dev(chan), "only allocated %d descriptors\n", i);
+-
+- return i;
++ return 0;
+ }
+
+ static void dwc_free_chan_resources(struct dma_chan *chan)
+ {
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+- struct dw_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+@@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+- list_splice_init(&dwc->free_list, &list);
+- dwc->descs_allocated = 0;
+
+ /* Clear custom channel configuration */
+ dwc->src_id = 0;
+ dwc->dst_id = 0;
+
+- dwc->src_master = 0;
+- dwc->dst_master = 0;
++ dwc->m_master = 0;
++ dwc->p_master = 0;
+
+- dwc->initialized = false;
++ clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+@@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru
+ if (!dw->in_use)
+ dw_dma_off(dw);
+
+- list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+- dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+- dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+- }
+-
+ dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
+ }
+
+@@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+ struct dw_cyclic_desc *retval = NULL;
+ struct dw_desc *desc;
+ struct dw_desc *last = NULL;
++ u8 lms = DWC_LLP_LMS(dwc->m_master);
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods;
+@@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ retval = ERR_PTR(-ENOMEM);
+
+- if (periods > NR_DESCS_PER_CHANNEL)
+- goto out_err;
+-
+ cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+ if (!cdesc)
+ goto out_err;
+@@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+- desc->lli.dar = sconfig->dst_addr;
+- desc->lli.sar = buf_addr + (period_len * i);
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_FIX
+- | DWC_CTLL_SRC_INC
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+- DWC_CTLL_FC(DW_DMA_FC_D_M2P);
++ lli_write(desc, dar, sconfig->dst_addr);
++ lli_write(desc, sar, buf_addr + period_len * i);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_FIX
++ | DWC_CTLL_SRC_INC
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
++ DWC_CTLL_FC(DW_DMA_FC_D_M2P));
+
+ break;
+ case DMA_DEV_TO_MEM:
+- desc->lli.dar = buf_addr + (period_len * i);
+- desc->lli.sar = sconfig->src_addr;
+- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan)
+- | DWC_CTLL_SRC_WIDTH(reg_width)
+- | DWC_CTLL_DST_WIDTH(reg_width)
+- | DWC_CTLL_DST_INC
+- | DWC_CTLL_SRC_FIX
+- | DWC_CTLL_INT_EN);
+-
+- desc->lli.ctllo |= sconfig->device_fc ?
+- DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+- DWC_CTLL_FC(DW_DMA_FC_D_P2M);
++ lli_write(desc, dar, buf_addr + period_len * i);
++ lli_write(desc, sar, sconfig->src_addr);
++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan)
++ | DWC_CTLL_SRC_WIDTH(reg_width)
++ | DWC_CTLL_DST_WIDTH(reg_width)
++ | DWC_CTLL_DST_INC
++ | DWC_CTLL_SRC_FIX
++ | DWC_CTLL_INT_EN));
++
++ lli_set(desc, ctllo, sconfig->device_fc ?
++ DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
++ DWC_CTLL_FC(DW_DMA_FC_D_P2M));
+
+ break;
+ default:
+ break;
+ }
+
+- desc->lli.ctlhi = (period_len >> reg_width);
++ lli_write(desc, ctlhi, period_len >> reg_width);
+ cdesc->desc[i] = desc;
+
+ if (last)
+- last->lli.llp = desc->txd.phys;
++ lli_write(last, llp, desc->txd.phys | lms);
+
+ last = desc;
+ }
+
+ /* Let's make a cyclic list */
+- last->lli.llp = cdesc->desc[0]->txd.phys;
++ lli_write(last, llp, cdesc->desc[0]->txd.phys | lms);
+
+ dev_dbg(chan2dev(&dwc->chan),
+ "cyclic prepared buf %pad len %zu period %zu periods %d\n",
+@@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_cyclic_desc *cdesc = dwc->cdesc;
+- int i;
++ unsigned int i;
+ unsigned long flags;
+
+ dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__);
+@@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan
+ kfree(cdesc->desc);
+ kfree(cdesc);
+
++ dwc->cdesc = NULL;
++
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ }
+ EXPORT_SYMBOL(dw_dma_cyclic_free);
+
+ /*----------------------------------------------------------------------*/
+
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
++int dw_dma_probe(struct dw_dma_chip *chip)
+ {
++ struct dw_dma_platform_data *pdata;
+ struct dw_dma *dw;
+ bool autocfg = false;
+ unsigned int dw_params;
+- unsigned int max_blk_size = 0;
++ unsigned int i;
+ int err;
+- int i;
+
+ dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
++ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
++ if (!dw->pdata)
++ return -ENOMEM;
++
+ dw->regs = chip->regs;
+ chip->dw = dw;
+
+ pm_runtime_get_sync(chip->dev);
+
+- if (!pdata) {
+- dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
++ if (!chip->pdata) {
++ dw_params = dma_readl(dw, DW_PARAMS);
+ dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+
+ autocfg = dw_params >> DW_PARAMS_EN & 1;
+@@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
+- if (!pdata) {
+- err = -ENOMEM;
+- goto err_pdata;
+- }
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+
+ /* Get hardware configuration parameters */
+ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+ pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+ for (i = 0; i < pdata->nr_masters; i++) {
+ pdata->data_width[i] =
+- (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
++ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
+ }
+- max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
++ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
+
+ /* Fill platform data with the default values */
+ pdata->is_private = true;
+ pdata->is_memcpy = true;
+ pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
+ pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
+- } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
++ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+ err = -EINVAL;
+ goto err_pdata;
++ } else {
++ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
++
++ /* Reassign the platform data pointer */
++ pdata = dw->pdata;
+ }
+
+ dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
+@@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ goto err_pdata;
+ }
+
+- /* Get hardware configuration parameters */
+- dw->nr_masters = pdata->nr_masters;
+- for (i = 0; i < dw->nr_masters; i++)
+- dw->data_width[i] = pdata->data_width[i];
+-
+ /* Calculate all channel mask before DMA setup */
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+@@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+- INIT_LIST_HEAD(&dwc->free_list);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+@@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi
+
+ /* Hardware configuration */
+ if (autocfg) {
+- unsigned int dwc_params;
+ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+- void __iomem *addr = chip->regs + r * sizeof(u32);
+-
+- dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
++ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
++ unsigned int dwc_params = dma_readl_native(addr);
+
+ dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
+ dwc_params);
+@@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi
+ * up to 0x0a for 4095.
+ */
+ dwc->block_size =
+- (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1;
++ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+ dwc->nollp =
+ (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+ } else {
+ dwc->block_size = pdata->block_size;
+
+ /* Check if channel supports multi block transfer */
+- channel_writel(dwc, LLP, 0xfffffffc);
+- dwc->nollp =
+- (channel_readl(dwc, LLP) & 0xfffffffc) == 0;
++ channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
++ dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
+ channel_writel(dwc, LLP, 0);
+ }
+ }
+--- a/drivers/dma/dw/pci.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/pci.c 2016-05-21 22:47:08.665465180 +0200
+@@ -17,8 +17,8 @@
+
+ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
+ {
++ const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ struct dw_dma_chip *chip;
+- struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+@@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev *
+ chip->dev = &pdev->dev;
+ chip->regs = pcim_iomap_table(pdev)[0];
+ chip->irq = pdev->irq;
++ chip->pdata = pdata;
+
+- ret = dw_dma_probe(chip, pdata);
++ ret = dw_dma_probe(chip);
+ if (ret)
+ return ret;
+
+@@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci
+
+ /* Haswell */
+ { PCI_VDEVICE(INTEL, 0x9c60) },
++
++ /* Broadwell */
++ { PCI_VDEVICE(INTEL, 0x9ce0) },
++
+ { }
+ };
+ MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
+--- a/drivers/dma/dw/platform.c 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/platform.c 2016-05-21 22:47:08.665465180 +0200
+@@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate(
+
+ slave.src_id = dma_spec->args[0];
+ slave.dst_id = dma_spec->args[0];
+- slave.src_master = dma_spec->args[1];
+- slave.dst_master = dma_spec->args[2];
++ slave.m_master = dma_spec->args[1];
++ slave.p_master = dma_spec->args[2];
+
+ if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
+ slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
+- slave.src_master >= dw->nr_masters ||
+- slave.dst_master >= dw->nr_masters))
++ slave.m_master >= dw->pdata->nr_masters ||
++ slave.p_master >= dw->pdata->nr_masters))
+ return NULL;
+
+ dma_cap_zero(cap);
+@@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm
+ .dma_dev = dma_spec->dev,
+ .src_id = dma_spec->slave_id,
+ .dst_id = dma_spec->slave_id,
+- .src_master = 1,
+- .dst_master = 0,
++ .m_master = 0,
++ .p_master = 1,
+ };
+
+ return dw_dma_filter(chan, &slave);
+@@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device *
+ struct device_node *np = pdev->dev.of_node;
+ struct dw_dma_platform_data *pdata;
+ u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
++ u32 nr_masters;
++ u32 nr_channels;
+
+ if (!np) {
+ dev_err(&pdev->dev, "Missing DT data\n");
+ return NULL;
+ }
+
++ if (of_property_read_u32(np, "dma-masters", &nr_masters))
++ return NULL;
++ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
++ return NULL;
++
++ if (of_property_read_u32(np, "dma-channels", &nr_channels))
++ return NULL;
++
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return NULL;
+
+- if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
+- return NULL;
++ pdata->nr_masters = nr_masters;
++ pdata->nr_channels = nr_channels;
+
+ if (of_property_read_bool(np, "is_private"))
+ pdata->is_private = true;
+@@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device *
+ if (!of_property_read_u32(np, "block_size", &tmp))
+ pdata->block_size = tmp;
+
+- if (!of_property_read_u32(np, "dma-masters", &tmp)) {
+- if (tmp > DW_DMA_MAX_NR_MASTERS)
+- return NULL;
+-
+- pdata->nr_masters = tmp;
+- }
+-
+- if (!of_property_read_u32_array(np, "data_width", arr,
+- pdata->nr_masters))
+- for (tmp = 0; tmp < pdata->nr_masters; tmp++)
++ if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
+ pdata->data_width[tmp] = arr[tmp];
++ } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
++ for (tmp = 0; tmp < nr_masters; tmp++)
++ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
++ }
+
+ return pdata;
+ }
+@@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi
+ struct dw_dma_chip *chip;
+ struct device *dev = &pdev->dev;
+ struct resource *mem;
+- const struct acpi_device_id *id;
+- struct dw_dma_platform_data *pdata;
++ const struct dw_dma_platform_data *pdata;
+ int err;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+@@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi
+ pdata = dev_get_platdata(dev);
+ if (!pdata)
+ pdata = dw_dma_parse_dt(pdev);
+- if (!pdata && has_acpi_companion(dev)) {
+- id = acpi_match_device(dev->driver->acpi_match_table, dev);
+- if (id)
+- pdata = (struct dw_dma_platform_data *)id->driver_data;
+- }
+
+ chip->dev = dev;
++ chip->pdata = pdata;
+
+ chip->clk = devm_clk_get(chip->dev, "hclk");
+ if (IS_ERR(chip->clk))
+@@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi
+
+ pm_runtime_enable(&pdev->dev);
+
+- err = dw_dma_probe(chip, pdata);
++ err = dw_dma_probe(chip);
+ if (err)
+ goto err_dw_dma_probe;
+
+@@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_
+ {
+ struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+
++ /*
++ * We have to call dw_dma_disable() to stop any ongoing transfer. On
++ * some platforms we can't do that since DMA device is powered off.
++ * Moreover we have no possibility to check if the platform is affected
++ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
++ * unconditionally. On the other hand we can't use
++ * pm_runtime_suspended() because runtime PM framework is not fully
++ * used by the driver.
++ */
++ pm_runtime_get_sync(chip->dev);
+ dw_dma_disable(chip);
++ pm_runtime_put_sync_suspend(chip->dev);
++
+ clk_disable_unprepare(chip->clk);
+ }
+
+@@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab
+ #endif
+
+ #ifdef CONFIG_ACPI
+-static struct dw_dma_platform_data dw_dma_acpi_pdata = {
+- .nr_channels = 8,
+- .is_private = true,
+- .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+- .chan_priority = CHAN_PRIORITY_ASCENDING,
+- .block_size = 4095,
+- .nr_masters = 2,
+-};
+-
+ static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+- { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata },
++ { "INTL9C60", 0 },
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
+--- a/drivers/dma/dw/regs.h 2016-05-21 23:13:19.964478443 +0200
++++ b/drivers/dma/dw/regs.h 2016-05-21 22:47:08.665465180 +0200
+@@ -114,10 +114,6 @@ struct dw_dma_regs {
+ #define dma_writel_native writel
+ #endif
+
+-/* To access the registers in early stage of probe */
+-#define dma_read_byaddr(addr, name) \
+- dma_readl_native((addr) + offsetof(struct dw_dma_regs, name))
+-
+ /* Bitfields in DW_PARAMS */
+ #define DW_PARAMS_NR_CHAN 8 /* number of channels */
+ #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */
+@@ -143,6 +139,10 @@ enum dw_dma_msize {
+ DW_DMA_MSIZE_256,
+ };
+
++/* Bitfields in LLP */
++#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */
++#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */
++
+ /* Bitfields in CTL_LO */
+ #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+ #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+@@ -150,7 +150,7 @@ enum dw_dma_msize {
+ #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+ #define DWC_CTLL_DST_DEC (1<<7)
+ #define DWC_CTLL_DST_FIX (2<<7)
+-#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
++#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */
+ #define DWC_CTLL_SRC_DEC (1<<9)
+ #define DWC_CTLL_SRC_FIX (2<<9)
+ #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+@@ -216,6 +216,8 @@ enum dw_dma_msize {
+ enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+ DW_DMA_IS_SOFT_LLP = 1,
++ DW_DMA_IS_PAUSED = 2,
++ DW_DMA_IS_INITIALIZED = 3,
+ };
+
+ struct dw_dma_chan {
+@@ -224,8 +226,6 @@ struct dw_dma_chan {
+ u8 mask;
+ u8 priority;
+ enum dma_transfer_direction direction;
+- bool paused;
+- bool initialized;
+
+ /* software emulation of the LLP transfers */
+ struct list_head *tx_node_active;
+@@ -236,8 +236,6 @@ struct dw_dma_chan {
+ unsigned long flags;
+ struct list_head active_list;
+ struct list_head queue;
+- struct list_head free_list;
+- u32 residue;
+ struct dw_cyclic_desc *cdesc;
+
+ unsigned int descs_allocated;
+@@ -249,8 +247,8 @@ struct dw_dma_chan {
+ /* custom slave configuration */
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+
+ /* configuration passed via .device_config */
+ struct dma_slave_config dma_sconfig;
+@@ -283,9 +281,8 @@ struct dw_dma {
+ u8 all_chan_mask;
+ u8 in_use;
+
+- /* hardware configuration */
+- unsigned char nr_masters;
+- unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
++ /* platform data */
++ struct dw_dma_platform_data *pdata;
+ };
+
+ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+@@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s
+ return container_of(ddev, struct dw_dma, dma);
+ }
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++typedef __be32 __dw32;
++#else
++typedef __le32 __dw32;
++#endif
++
+ /* LLI == Linked List Item; a.k.a. DMA block descriptor */
+ struct dw_lli {
+ /* values that are not changed by hardware */
+- u32 sar;
+- u32 dar;
+- u32 llp; /* chain to next lli */
+- u32 ctllo;
++ __dw32 sar;
++ __dw32 dar;
++ __dw32 llp; /* chain to next lli */
++ __dw32 ctllo;
+ /* values that may get written back: */
+- u32 ctlhi;
++ __dw32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+- u32 sstat;
+- u32 dstat;
++ __dw32 sstat;
++ __dw32 dstat;
+ };
+
+ struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v))
++#define lli_read(d, reg) be32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v))
++#else
++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v))
++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v))
++#define lli_read(d, reg) le32_to_cpu((d)->lli.reg)
++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v))
++#endif
++
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+ size_t total_len;
++ u32 residue;
+ };
+
+ #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node)
+--- a/include/linux/dma/dw.h
++++ b/include/linux/dma/dw.h
+@@ -27,6 +27,7 @@ struct dw_dma;
+ * @regs: memory mapped I/O space
+ * @clk: hclk clock
+ * @dw: struct dw_dma that is filed by dw_dma_probe()
++ * @pdata: pointer to platform data
+ */
+ struct dw_dma_chip {
+ struct device *dev;
+@@ -34,10 +35,12 @@ struct dw_dma_chip {
+ void __iomem *regs;
+ struct clk *clk;
+ struct dw_dma *dw;
++
++ const struct dw_dma_platform_data *pdata;
+ };
+
+ /* Export to the platform drivers */
+-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
++int dw_dma_probe(struct dw_dma_chip *chip);
+ int dw_dma_remove(struct dw_dma_chip *chip);
+
+ /* DMA API extensions */
+diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
+index 03b6095..d15d8ba 100644
+--- a/include/linux/platform_data/dma-dw.h
++++ b/include/linux/platform_data/dma-dw.h
+@@ -21,15 +21,15 @@
+ * @dma_dev: required DMA master device
+ * @src_id: src request line
+ * @dst_id: dst request line
+- * @src_master: src master for transfers on allocated channel.
+- * @dst_master: dest master for transfers on allocated channel.
++ * @m_master: memory master for transfers on allocated channel
++ * @p_master: peripheral master for transfers on allocated channel
+ */
+ struct dw_dma_slave {
+ struct device *dma_dev;
+ u8 src_id;
+ u8 dst_id;
+- u8 src_master;
+- u8 dst_master;
++ u8 m_master;
++ u8 p_master;
+ };
+
+ /**
+@@ -43,7 +43,7 @@ struct dw_dma_slave {
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+- * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
++ * (in bytes, power of 2)
+ */
+ struct dw_dma_platform_data {
+ unsigned int nr_channels;
+@@ -55,7 +55,7 @@ struct dw_dma_platform_data {
+ #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */
+ #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */
+ unsigned char chan_priority;
+- unsigned short block_size;
++ unsigned int block_size;
+ unsigned char nr_masters;
+ unsigned char data_width[DW_DMA_MAX_NR_MASTERS];
+ };
+--
+2.8.1
+