diff options
Diffstat (limited to 'target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch')
-rw-r--r-- | target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch | 1522 |
1 files changed, 1522 insertions, 0 deletions
diff --git a/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch new file mode 100644 index 0000000000..96b11a82b6 --- /dev/null +++ b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch @@ -0,0 +1,1522 @@ +From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems) +Date: Mon, 25 Apr 2016 15:35:05 +0300 + +This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html) +contains a number of mostly minor fixes and cleanups for the DW DMA driver. A +couple of them affect the DT binding so these may need to be updated to +maintain compatibility (old format is still supported though). The rest should +be relatively straight-forward. + +This version has been tested on the following bare metal platforms: +- ATNGW100 (avr32 based platform) with dmatest +- Sam460ex (powerpc 44x based platform) with SATA +- Intel Braswell with UART +- Intel Galileo (Intel Quark based platform) with UART + +(SATA driver and Intel Galileo UART support are based on this series and just + published recently for a review) + +Vinod, there are few patch sets developed on top of this one, so, the idea is +to keep this in an immuutable branch / tag. + +Changes since v5: +- fixed an issue found by kbuildbot + +Changes since v4: +- send proper set of patches +- add changelog + +Changes since v3: +- add patch 1 to check value of dma-masters property +- drop the upstreamed patches +- update patch 2 to keep an array for data-width property as well + +Changes since v2: +- add patch 1 to fix master selection which was broken for long time +- remove "use field-by-field initialization" patch since like Mans metioned in + has mostly no value and even might increase error prone +- rebase on top of recent linux-next +- wide testing on several platforms + +Changes since v1: +- zeroing struct dw_dma_slave before use +- fall back to old data_width property if data-width is not found +- append tags for few patches +- correct title of cover letter +- rebase on top of recent linux-next + +Andy Shevchenko (4): + dmaengine: dw: platform: check nr_masters to be non-zero + dmaengine: dw: revisit data_width property + dmaengine: dw: keep entire platform data in struct dw_dma + dmaengine: dw: pass platform data via struct dw_dma_chip + + Documentation/devicetree/bindings/dma/snps-dma.txt | 6 +- + arch/arc/boot/dts/abilis_tb10x.dtsi | 2 +- + arch/arm/boot/dts/spear13xx.dtsi | 4 +- + drivers/ata/sata_dwc_460ex.c | 2 +- + drivers/dma/dw/core.c | 75 ++++++++-------------- + drivers/dma/dw/pci.c | 5 +- + drivers/dma/dw/platform.c | 32 +++++---- + drivers/dma/dw/regs.h | 5 +- + include/linux/dma/dw.h | 5 +- + include/linux/platform_data/dma-dw.h | 4 +- + sound/soc/intel/common/sst-firmware.c | 2 +- + 11 files changed, 64 insertions(+), 78 deletions(-) + +--- a/drivers/dma/dw/core.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/core.c 2016-05-21 22:47:08.665465180 +0200 +@@ -45,22 +45,19 @@ + DW_DMA_MSIZE_16; \ + u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \ + DW_DMA_MSIZE_16; \ ++ u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \ ++ _dwc->p_master : _dwc->m_master; \ ++ u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \ ++ _dwc->p_master : _dwc->m_master; \ + \ + (DWC_CTLL_DST_MSIZE(_dmsize) \ + | DWC_CTLL_SRC_MSIZE(_smsize) \ + | DWC_CTLL_LLP_D_EN \ + | DWC_CTLL_LLP_S_EN \ +- | DWC_CTLL_DMS(_dwc->dst_master) \ +- | DWC_CTLL_SMS(_dwc->src_master)); \ ++ | DWC_CTLL_DMS(_dms) \ ++ | DWC_CTLL_SMS(_sms)); \ + }) + +-/* +- * Number of descriptors to allocate for each channel. This should be +- * made configurable somehow; preferably, the clients (at least the +- * ones using slave transfers) should be able to give us a hint. +- */ +-#define NR_DESCS_PER_CHANNEL 64 +- + /* The set of bus widths supported by the DMA controller */ + #define DW_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ +@@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active( + return to_dw_desc(dwc->active_list.next); + } + +-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) ++static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) + { +- struct dw_desc *desc, *_desc; +- struct dw_desc *ret = NULL; +- unsigned int i = 0; +- unsigned long flags; ++ struct dw_desc *desc = txd_to_dw_desc(tx); ++ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); ++ dma_cookie_t cookie; ++ unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); +- list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { +- i++; +- if (async_tx_test_ack(&desc->txd)) { +- list_del(&desc->desc_node); +- ret = desc; +- break; +- } +- dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); +- } ++ cookie = dma_cookie_assign(tx); ++ ++ /* ++ * REVISIT: We should attempt to chain as many descriptors as ++ * possible, perhaps even appending to those already submitted ++ * for DMA. But this is hard to do in a race-free manner. ++ */ ++ ++ list_add_tail(&desc->desc_node, &dwc->queue); + spin_unlock_irqrestore(&dwc->lock, flags); ++ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", ++ __func__, desc->txd.cookie); + +- dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); ++ return cookie; ++} + +- return ret; ++static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) ++{ ++ struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ struct dw_desc *desc; ++ dma_addr_t phys; ++ ++ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); ++ if (!desc) ++ return NULL; ++ ++ dwc->descs_allocated++; ++ INIT_LIST_HEAD(&desc->tx_list); ++ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); ++ desc->txd.tx_submit = dwc_tx_submit; ++ desc->txd.flags = DMA_CTRL_ACK; ++ desc->txd.phys = phys; ++ return desc; + } + +-/* +- * Move a descriptor, including any children, to the free list. +- * `desc' must not be on any lists. +- */ + static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) + { +- unsigned long flags; ++ struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ struct dw_desc *child, *_next; + +- if (desc) { +- struct dw_desc *child; ++ if (unlikely(!desc)) ++ return; + +- spin_lock_irqsave(&dwc->lock, flags); +- list_for_each_entry(child, &desc->tx_list, desc_node) +- dev_vdbg(chan2dev(&dwc->chan), +- "moving child desc %p to freelist\n", +- child); +- list_splice_init(&desc->tx_list, &dwc->free_list); +- dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); +- list_add(&desc->desc_node, &dwc->free_list); +- spin_unlock_irqrestore(&dwc->lock, flags); ++ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { ++ list_del(&child->desc_node); ++ dma_pool_free(dw->desc_pool, child, child->txd.phys); ++ dwc->descs_allocated--; + } ++ ++ dma_pool_free(dw->desc_pool, desc, desc->txd.phys); ++ dwc->descs_allocated--; + } + + static void dwc_initialize(struct dw_dma_chan *dwc) +@@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma + u32 cfghi = DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + +- if (dwc->initialized == true) ++ if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) + return; + + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); +@@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); + +- dwc->initialized = true; ++ set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); + } + + /*----------------------------------------------------------------------*/ + +-static inline unsigned int dwc_fast_ffs(unsigned long long v) +-{ +- /* +- * We can be a lot more clever here, but this should take care +- * of the most common optimization. +- */ +- if (!(v & 7)) +- return 3; +- else if (!(v & 3)) +- return 2; +- else if (!(v & 1)) +- return 1; +- return 0; +-} +- + static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) + { + dev_err(chan2dev(&dwc->chan), +@@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s + * Software emulation of LLP mode relies on interrupts to continue + * multi block transfer. + */ +- ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN; ++ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; + +- channel_writel(dwc, SAR, desc->lli.sar); +- channel_writel(dwc, DAR, desc->lli.dar); ++ channel_writel(dwc, SAR, lli_read(desc, sar)); ++ channel_writel(dwc, DAR, lli_read(desc, dar)); + channel_writel(dwc, CTL_LO, ctllo); +- channel_writel(dwc, CTL_HI, desc->lli.ctlhi); ++ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); + channel_set_bit(dw, CH_EN, dwc->mask); + + /* Move pointer to next descriptor */ +@@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s + static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) + { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ u8 lms = DWC_LLP_LMS(dwc->m_master); + unsigned long was_soft_llp; + + /* ASSERT: channel is idle */ +@@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch + + dwc_initialize(dwc); + +- dwc->residue = first->total_len; ++ first->residue = first->total_len; + dwc->tx_node_active = &first->tx_list; + + /* Submit first block */ +@@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch + + dwc_initialize(dwc); + +- channel_writel(dwc, LLP, first->txd.phys); +- channel_writel(dwc, CTL_LO, +- DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); ++ channel_writel(dwc, LLP, first->txd.phys | lms); ++ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); + } +@@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch + list_for_each_entry(child, &desc->tx_list, desc_node) + async_tx_ack(&child->txd); + async_tx_ack(&desc->txd); +- +- list_splice_init(&desc->tx_list, &dwc->free_list); +- list_move(&desc->desc_node, &dwc->free_list); +- +- dma_descriptor_unmap(txd); ++ dwc_desc_put(dwc, desc); + spin_unlock_irqrestore(&dwc->lock, flags); + + if (callback) +@@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct + + head = &desc->tx_list; + if (active != head) { +- /* Update desc to reflect last sent one */ +- if (active != head->next) +- desc = to_dw_desc(active->prev); +- +- dwc->residue -= desc->len; ++ /* Update residue to reflect last sent descriptor */ ++ if (active == head->next) ++ desc->residue -= desc->len; ++ else ++ desc->residue -= to_dw_desc(active->prev)->len; + + child = to_dw_desc(active); + +@@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct + clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); + } + +- dwc->residue = 0; +- + spin_unlock_irqrestore(&dwc->lock, flags); + + dwc_complete_all(dw, dwc); +@@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct + } + + if (list_empty(&dwc->active_list)) { +- dwc->residue = 0; + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } +@@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* Initial residue value */ +- dwc->residue = desc->total_len; ++ desc->residue = desc->total_len; + + /* Check first descriptors addr */ +- if (desc->txd.phys == llp) { ++ if (desc->txd.phys == DWC_LLP_LOC(llp)) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* Check first descriptors llp */ +- if (desc->lli.llp == llp) { ++ if (lli_read(desc, llp) == llp) { + /* This one is currently in progress */ +- dwc->residue -= dwc_get_sent(dwc); ++ desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + +- dwc->residue -= desc->len; ++ desc->residue -= desc->len; + list_for_each_entry(child, &desc->tx_list, desc_node) { +- if (child->lli.llp == llp) { ++ if (lli_read(child, llp) == llp) { + /* Currently in progress */ +- dwc->residue -= dwc_get_sent(dwc); ++ desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } +- dwc->residue -= child->len; ++ desc->residue -= child->len; + } + + /* +@@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct + spin_unlock_irqrestore(&dwc->lock, flags); + } + +-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) ++static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) + { + dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", +- lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo); ++ lli_read(desc, sar), ++ lli_read(desc, dar), ++ lli_read(desc, llp), ++ lli_read(desc, ctlhi), ++ lli_read(desc, ctllo)); + } + + static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +@@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d + */ + dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" + " cookie: %d\n", bad_desc->txd.cookie); +- dwc_dump_lli(dwc, &bad_desc->lli); ++ dwc_dump_lli(dwc, bad_desc); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) +- dwc_dump_lli(dwc, &child->lli); ++ dwc_dump_lli(dwc, child); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_ + */ + if (unlikely(status_err & dwc->mask) || + unlikely(status_xfer & dwc->mask)) { +- int i; ++ unsigned int i; + + dev_err(chan2dev(&dwc->chan), + "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", +@@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_ + dma_writel(dw, CLEAR.XFER, dwc->mask); + + for (i = 0; i < dwc->cdesc->periods; i++) +- dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); ++ dwc_dump_lli(dwc, dwc->cdesc->desc[i]); + + spin_unlock_irqrestore(&dwc->lock, flags); + } +@@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long + u32 status_block; + u32 status_xfer; + u32 status_err; +- int i; ++ unsigned int i; + + status_block = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.XFER); +@@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long + static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) + { + struct dw_dma *dw = dev_id; +- u32 status = dma_readl(dw, STATUS_INT); ++ u32 status; ++ ++ /* Check if we have any interrupt from the DMAC which is not in use */ ++ if (!dw->in_use) ++ return IRQ_NONE; + ++ status = dma_readl(dw, STATUS_INT); + dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status); + + /* Check if we have any interrupt from the DMAC */ +- if (!status || !dw->in_use) ++ if (!status) + return IRQ_NONE; + + /* +@@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int + + /*----------------------------------------------------------------------*/ + +-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +-{ +- struct dw_desc *desc = txd_to_dw_desc(tx); +- struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); +- dma_cookie_t cookie; +- unsigned long flags; +- +- spin_lock_irqsave(&dwc->lock, flags); +- cookie = dma_cookie_assign(tx); +- +- /* +- * REVISIT: We should attempt to chain as many descriptors as +- * possible, perhaps even appending to those already submitted +- * for DMA. But this is hard to do in a race-free manner. +- */ +- +- dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); +- list_add_tail(&desc->desc_node, &dwc->queue); +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- return cookie; +-} +- + static struct dma_async_tx_descriptor * + dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +@@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + struct dw_desc *prev; + size_t xfer_count; + size_t offset; ++ u8 m_master = dwc->m_master; + unsigned int src_width; + unsigned int dst_width; +- unsigned int data_width; ++ unsigned int data_width = dw->pdata->data_width[m_master]; + u32 ctllo; ++ u8 lms = DWC_LLP_LMS(m_master); + + dev_vdbg(chan2dev(chan), + "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, +@@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + + dwc->direction = DMA_MEM_TO_MEM; + +- data_width = min_t(unsigned int, dw->data_width[dwc->src_master], +- dw->data_width[dwc->dst_master]); +- +- src_width = dst_width = min_t(unsigned int, data_width, +- dwc_fast_ffs(src | dest | len)); ++ src_width = dst_width = __ffs(data_width | src | dest | len); + + ctllo = DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(dst_width) +@@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + if (!desc) + goto err_desc_get; + +- desc->lli.sar = src + offset; +- desc->lli.dar = dest + offset; +- desc->lli.ctllo = ctllo; +- desc->lli.ctlhi = xfer_count; ++ lli_write(desc, sar, src + offset); ++ lli_write(desc, dar, dest + offset); ++ lli_write(desc, ctllo, ctllo); ++ lli_write(desc, ctlhi, xfer_count); + desc->len = xfer_count << src_width; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ +- prev->lli.ctllo |= DWC_CTLL_INT_EN; ++ lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; ++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->txd.flags = flags; + first->total_len = len; + +@@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo; ++ u8 m_master = dwc->m_master; ++ u8 lms = DWC_LLP_LMS(m_master); + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; +- unsigned int data_width; ++ unsigned int data_width = dw->pdata->data_width[m_master]; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; +@@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P); + +- data_width = dw->data_width[dwc->src_master]; +- + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; +@@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan, + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + +- mem_width = min_t(unsigned int, +- data_width, dwc_fast_ffs(mem | len)); ++ mem_width = __ffs(data_width | mem | len); + + slave_sg_todev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + +- desc->lli.sar = mem; +- desc->lli.dar = reg; +- desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); ++ lli_write(desc, sar, mem); ++ lli_write(desc, dar, reg); ++ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); + if ((len >> mem_width) > dwc->block_size) { + dlen = dwc->block_size << mem_width; + mem += dlen; +@@ -826,15 +797,14 @@ slave_sg_todev_fill_desc: + len = 0; + } + +- desc->lli.ctlhi = dlen >> mem_width; ++ lli_write(desc, ctlhi, dlen >> mem_width); + desc->len = dlen; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + total_len += dlen; +@@ -854,8 +824,6 @@ slave_sg_todev_fill_desc: + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M); + +- data_width = dw->data_width[dwc->dst_master]; +- + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; +@@ -863,17 +831,16 @@ slave_sg_todev_fill_desc: + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + +- mem_width = min_t(unsigned int, +- data_width, dwc_fast_ffs(mem | len)); ++ mem_width = __ffs(data_width | mem | len); + + slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + +- desc->lli.sar = reg; +- desc->lli.dar = mem; +- desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); ++ lli_write(desc, sar, reg); ++ lli_write(desc, dar, mem); ++ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); + if ((len >> reg_width) > dwc->block_size) { + dlen = dwc->block_size << reg_width; + mem += dlen; +@@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc: + dlen = len; + len = 0; + } +- desc->lli.ctlhi = dlen >> reg_width; ++ lli_write(desc, ctlhi, dlen >> reg_width); + desc->len = dlen; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + total_len += dlen; +@@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc: + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ +- prev->lli.ctllo |= DWC_CTLL_INT_EN; ++ lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; ++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->total_len = total_len; + + return &first->txd; +@@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan + dwc->src_id = dws->src_id; + dwc->dst_id = dws->dst_id; + +- dwc->src_master = dws->src_master; +- dwc->dst_master = dws->dst_master; ++ dwc->m_master = dws->m_master; ++ dwc->p_master = dws->p_master; + + return true; + } +@@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) + udelay(2); + +- dwc->paused = true; ++ set_bit(DW_DMA_IS_PAUSED, &dwc->flags); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc + + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); + +- dwc->paused = false; ++ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); + } + + static int dwc_resume(struct dma_chan *chan) +@@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + +- if (!dwc->paused) +- return 0; +- + spin_lock_irqsave(&dwc->lock, flags); + +- dwc_chan_resume(dwc); ++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) ++ dwc_chan_resume(dwc); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_ + return 0; + } + +-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc) ++static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) ++{ ++ struct dw_desc *desc; ++ ++ list_for_each_entry(desc, &dwc->active_list, desc_node) ++ if (desc->txd.cookie == c) ++ return desc; ++ ++ return NULL; ++} ++ ++static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie) + { ++ struct dw_desc *desc; + unsigned long flags; + u32 residue; + + spin_lock_irqsave(&dwc->lock, flags); + +- residue = dwc->residue; +- if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) +- residue -= dwc_get_sent(dwc); ++ desc = dwc_find_desc(dwc, cookie); ++ if (desc) { ++ if (desc == dwc_first_active(dwc)) { ++ residue = desc->residue; ++ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) ++ residue -= dwc_get_sent(dwc); ++ } else { ++ residue = desc->total_len; ++ } ++ } else { ++ residue = 0; ++ } + + spin_unlock_irqrestore(&dwc->lock, flags); + return residue; +@@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan, + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + ret = dma_cookie_status(chan, cookie, txstate); +- if (ret != DMA_COMPLETE) +- dma_set_residue(txstate, dwc_get_residue(dwc)); ++ if (ret == DMA_COMPLETE) ++ return ret; ++ ++ dma_set_residue(txstate, dwc_get_residue(dwc, cookie)); + +- if (dwc->paused && ret == DMA_IN_PROGRESS) ++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS) + return DMA_PAUSED; + + return ret; +@@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma + + static void dw_dma_off(struct dw_dma *dw) + { +- int i; ++ unsigned int i; + + dma_writel(dw, CFG, 0); + +@@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw + cpu_relax(); + + for (i = 0; i < dw->dma.chancnt; i++) +- dw->chan[i].initialized = false; ++ clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); + } + + static void dw_dma_on(struct dw_dma *dw) +@@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru + { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); +- struct dw_desc *desc; +- int i; +- unsigned long flags; + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + +@@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru + dw_dma_on(dw); + dw->in_use |= dwc->mask; + +- spin_lock_irqsave(&dwc->lock, flags); +- i = dwc->descs_allocated; +- while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { +- dma_addr_t phys; +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys); +- if (!desc) +- goto err_desc_alloc; +- +- memset(desc, 0, sizeof(struct dw_desc)); +- +- INIT_LIST_HEAD(&desc->tx_list); +- dma_async_tx_descriptor_init(&desc->txd, chan); +- desc->txd.tx_submit = dwc_tx_submit; +- desc->txd.flags = DMA_CTRL_ACK; +- desc->txd.phys = phys; +- +- dwc_desc_put(dwc, desc); +- +- spin_lock_irqsave(&dwc->lock, flags); +- i = ++dwc->descs_allocated; +- } +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); +- +- return i; +- +-err_desc_alloc: +- dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); +- +- return i; ++ return 0; + } + + static void dwc_free_chan_resources(struct dma_chan *chan) + { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); +- struct dw_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(list); + +@@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_irqsave(&dwc->lock, flags); +- list_splice_init(&dwc->free_list, &list); +- dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + +- dwc->src_master = 0; +- dwc->dst_master = 0; ++ dwc->m_master = 0; ++ dwc->p_master = 0; + +- dwc->initialized = false; ++ clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); +@@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru + if (!dw->in_use) + dw_dma_off(dw); + +- list_for_each_entry_safe(desc, _desc, &list, desc_node) { +- dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); +- dma_pool_free(dw->desc_pool, desc, desc->txd.phys); +- } +- + dev_vdbg(chan2dev(chan), "%s: done\n", __func__); + } + +@@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + struct dw_cyclic_desc *retval = NULL; + struct dw_desc *desc; + struct dw_desc *last = NULL; ++ u8 lms = DWC_LLP_LMS(dwc->m_master); + unsigned long was_cyclic; + unsigned int reg_width; + unsigned int periods; +@@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + + retval = ERR_PTR(-ENOMEM); + +- if (periods > NR_DESCS_PER_CHANNEL) +- goto out_err; +- + cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); + if (!cdesc) + goto out_err; +@@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + + switch (direction) { + case DMA_MEM_TO_DEV: +- desc->lli.dar = sconfig->dst_addr; +- desc->lli.sar = buf_addr + (period_len * i); +- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) +- | DWC_CTLL_DST_WIDTH(reg_width) +- | DWC_CTLL_SRC_WIDTH(reg_width) +- | DWC_CTLL_DST_FIX +- | DWC_CTLL_SRC_INC +- | DWC_CTLL_INT_EN); +- +- desc->lli.ctllo |= sconfig->device_fc ? +- DWC_CTLL_FC(DW_DMA_FC_P_M2P) : +- DWC_CTLL_FC(DW_DMA_FC_D_M2P); ++ lli_write(desc, dar, sconfig->dst_addr); ++ lli_write(desc, sar, buf_addr + period_len * i); ++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) ++ | DWC_CTLL_DST_WIDTH(reg_width) ++ | DWC_CTLL_SRC_WIDTH(reg_width) ++ | DWC_CTLL_DST_FIX ++ | DWC_CTLL_SRC_INC ++ | DWC_CTLL_INT_EN)); ++ ++ lli_set(desc, ctllo, sconfig->device_fc ? ++ DWC_CTLL_FC(DW_DMA_FC_P_M2P) : ++ DWC_CTLL_FC(DW_DMA_FC_D_M2P)); + + break; + case DMA_DEV_TO_MEM: +- desc->lli.dar = buf_addr + (period_len * i); +- desc->lli.sar = sconfig->src_addr; +- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) +- | DWC_CTLL_SRC_WIDTH(reg_width) +- | DWC_CTLL_DST_WIDTH(reg_width) +- | DWC_CTLL_DST_INC +- | DWC_CTLL_SRC_FIX +- | DWC_CTLL_INT_EN); +- +- desc->lli.ctllo |= sconfig->device_fc ? +- DWC_CTLL_FC(DW_DMA_FC_P_P2M) : +- DWC_CTLL_FC(DW_DMA_FC_D_P2M); ++ lli_write(desc, dar, buf_addr + period_len * i); ++ lli_write(desc, sar, sconfig->src_addr); ++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) ++ | DWC_CTLL_SRC_WIDTH(reg_width) ++ | DWC_CTLL_DST_WIDTH(reg_width) ++ | DWC_CTLL_DST_INC ++ | DWC_CTLL_SRC_FIX ++ | DWC_CTLL_INT_EN)); ++ ++ lli_set(desc, ctllo, sconfig->device_fc ? ++ DWC_CTLL_FC(DW_DMA_FC_P_P2M) : ++ DWC_CTLL_FC(DW_DMA_FC_D_P2M)); + + break; + default: + break; + } + +- desc->lli.ctlhi = (period_len >> reg_width); ++ lli_write(desc, ctlhi, period_len >> reg_width); + cdesc->desc[i] = desc; + + if (last) +- last->lli.llp = desc->txd.phys; ++ lli_write(last, llp, desc->txd.phys | lms); + + last = desc; + } + + /* Let's make a cyclic list */ +- last->lli.llp = cdesc->desc[0]->txd.phys; ++ lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); + + dev_dbg(chan2dev(&dwc->chan), + "cyclic prepared buf %pad len %zu period %zu periods %d\n", +@@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_cyclic_desc *cdesc = dwc->cdesc; +- int i; ++ unsigned int i; + unsigned long flags; + + dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); +@@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan + kfree(cdesc->desc); + kfree(cdesc); + ++ dwc->cdesc = NULL; ++ + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); + } + EXPORT_SYMBOL(dw_dma_cyclic_free); + + /*----------------------------------------------------------------------*/ + +-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) ++int dw_dma_probe(struct dw_dma_chip *chip) + { ++ struct dw_dma_platform_data *pdata; + struct dw_dma *dw; + bool autocfg = false; + unsigned int dw_params; +- unsigned int max_blk_size = 0; ++ unsigned int i; + int err; +- int i; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + ++ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); ++ if (!dw->pdata) ++ return -ENOMEM; ++ + dw->regs = chip->regs; + chip->dw = dw; + + pm_runtime_get_sync(chip->dev); + +- if (!pdata) { +- dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); ++ if (!chip->pdata) { ++ dw_params = dma_readl(dw, DW_PARAMS); + dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); + + autocfg = dw_params >> DW_PARAMS_EN & 1; +@@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi + goto err_pdata; + } + +- pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); +- if (!pdata) { +- err = -ENOMEM; +- goto err_pdata; +- } ++ /* Reassign the platform data pointer */ ++ pdata = dw->pdata; + + /* Get hardware configuration parameters */ + pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; + pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; + for (i = 0; i < pdata->nr_masters; i++) { + pdata->data_width[i] = +- (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; ++ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); + } +- max_blk_size = dma_readl(dw, MAX_BLK_SIZE); ++ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); + + /* Fill platform data with the default values */ + pdata->is_private = true; + pdata->is_memcpy = true; + pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; + pdata->chan_priority = CHAN_PRIORITY_ASCENDING; +- } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { ++ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + err = -EINVAL; + goto err_pdata; ++ } else { ++ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); ++ ++ /* Reassign the platform data pointer */ ++ pdata = dw->pdata; + } + + dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), +@@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi + goto err_pdata; + } + +- /* Get hardware configuration parameters */ +- dw->nr_masters = pdata->nr_masters; +- for (i = 0; i < dw->nr_masters; i++) +- dw->data_width[i] = pdata->data_width[i]; +- + /* Calculate all channel mask before DMA setup */ + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + +@@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); +- INIT_LIST_HEAD(&dwc->free_list); + + channel_clear_bit(dw, CH_EN, dwc->mask); + +@@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi + + /* Hardware configuration */ + if (autocfg) { +- unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; +- void __iomem *addr = chip->regs + r * sizeof(u32); +- +- dwc_params = dma_read_byaddr(addr, DWC_PARAMS); ++ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; ++ unsigned int dwc_params = dma_readl_native(addr); + + dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, + dwc_params); +@@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi + * up to 0x0a for 4095. + */ + dwc->block_size = +- (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1; ++ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; + dwc->nollp = + (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; + } else { + dwc->block_size = pdata->block_size; + + /* Check if channel supports multi block transfer */ +- channel_writel(dwc, LLP, 0xfffffffc); +- dwc->nollp = +- (channel_readl(dwc, LLP) & 0xfffffffc) == 0; ++ channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff)); ++ dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0; + channel_writel(dwc, LLP, 0); + } + } +--- a/drivers/dma/dw/pci.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/pci.c 2016-05-21 22:47:08.665465180 +0200 +@@ -17,8 +17,8 @@ + + static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) + { ++ const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + struct dw_dma_chip *chip; +- struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + int ret; + + ret = pcim_enable_device(pdev); +@@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev * + chip->dev = &pdev->dev; + chip->regs = pcim_iomap_table(pdev)[0]; + chip->irq = pdev->irq; ++ chip->pdata = pdata; + +- ret = dw_dma_probe(chip, pdata); ++ ret = dw_dma_probe(chip); + if (ret) + return ret; + +@@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci + + /* Haswell */ + { PCI_VDEVICE(INTEL, 0x9c60) }, ++ ++ /* Broadwell */ ++ { PCI_VDEVICE(INTEL, 0x9ce0) }, ++ + { } + }; + MODULE_DEVICE_TABLE(pci, dw_pci_id_table); +--- a/drivers/dma/dw/platform.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/platform.c 2016-05-21 22:47:08.665465180 +0200 +@@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate( + + slave.src_id = dma_spec->args[0]; + slave.dst_id = dma_spec->args[0]; +- slave.src_master = dma_spec->args[1]; +- slave.dst_master = dma_spec->args[2]; ++ slave.m_master = dma_spec->args[1]; ++ slave.p_master = dma_spec->args[2]; + + if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || + slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || +- slave.src_master >= dw->nr_masters || +- slave.dst_master >= dw->nr_masters)) ++ slave.m_master >= dw->pdata->nr_masters || ++ slave.p_master >= dw->pdata->nr_masters)) + return NULL; + + dma_cap_zero(cap); +@@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm + .dma_dev = dma_spec->dev, + .src_id = dma_spec->slave_id, + .dst_id = dma_spec->slave_id, +- .src_master = 1, +- .dst_master = 0, ++ .m_master = 0, ++ .p_master = 1, + }; + + return dw_dma_filter(chan, &slave); +@@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device * + struct device_node *np = pdev->dev.of_node; + struct dw_dma_platform_data *pdata; + u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; ++ u32 nr_masters; ++ u32 nr_channels; + + if (!np) { + dev_err(&pdev->dev, "Missing DT data\n"); + return NULL; + } + ++ if (of_property_read_u32(np, "dma-masters", &nr_masters)) ++ return NULL; ++ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) ++ return NULL; ++ ++ if (of_property_read_u32(np, "dma-channels", &nr_channels)) ++ return NULL; ++ + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return NULL; + +- if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) +- return NULL; ++ pdata->nr_masters = nr_masters; ++ pdata->nr_channels = nr_channels; + + if (of_property_read_bool(np, "is_private")) + pdata->is_private = true; +@@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device * + if (!of_property_read_u32(np, "block_size", &tmp)) + pdata->block_size = tmp; + +- if (!of_property_read_u32(np, "dma-masters", &tmp)) { +- if (tmp > DW_DMA_MAX_NR_MASTERS) +- return NULL; +- +- pdata->nr_masters = tmp; +- } +- +- if (!of_property_read_u32_array(np, "data_width", arr, +- pdata->nr_masters)) +- for (tmp = 0; tmp < pdata->nr_masters; tmp++) ++ if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) { ++ for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = arr[tmp]; ++ } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { ++ for (tmp = 0; tmp < nr_masters; tmp++) ++ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); ++ } + + return pdata; + } +@@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi + struct dw_dma_chip *chip; + struct device *dev = &pdev->dev; + struct resource *mem; +- const struct acpi_device_id *id; +- struct dw_dma_platform_data *pdata; ++ const struct dw_dma_platform_data *pdata; + int err; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); +@@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi + pdata = dev_get_platdata(dev); + if (!pdata) + pdata = dw_dma_parse_dt(pdev); +- if (!pdata && has_acpi_companion(dev)) { +- id = acpi_match_device(dev->driver->acpi_match_table, dev); +- if (id) +- pdata = (struct dw_dma_platform_data *)id->driver_data; +- } + + chip->dev = dev; ++ chip->pdata = pdata; + + chip->clk = devm_clk_get(chip->dev, "hclk"); + if (IS_ERR(chip->clk)) +@@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi + + pm_runtime_enable(&pdev->dev); + +- err = dw_dma_probe(chip, pdata); ++ err = dw_dma_probe(chip); + if (err) + goto err_dw_dma_probe; + +@@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_ + { + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + ++ /* ++ * We have to call dw_dma_disable() to stop any ongoing transfer. On ++ * some platforms we can't do that since DMA device is powered off. ++ * Moreover we have no possibility to check if the platform is affected ++ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put() ++ * unconditionally. On the other hand we can't use ++ * pm_runtime_suspended() because runtime PM framework is not fully ++ * used by the driver. ++ */ ++ pm_runtime_get_sync(chip->dev); + dw_dma_disable(chip); ++ pm_runtime_put_sync_suspend(chip->dev); ++ + clk_disable_unprepare(chip->clk); + } + +@@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab + #endif + + #ifdef CONFIG_ACPI +-static struct dw_dma_platform_data dw_dma_acpi_pdata = { +- .nr_channels = 8, +- .is_private = true, +- .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, +- .chan_priority = CHAN_PRIORITY_ASCENDING, +- .block_size = 4095, +- .nr_masters = 2, +-}; +- + static const struct acpi_device_id dw_dma_acpi_id_table[] = { +- { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata }, ++ { "INTL9C60", 0 }, + { } + }; + MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table); +--- a/drivers/dma/dw/regs.h 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/regs.h 2016-05-21 22:47:08.665465180 +0200 +@@ -114,10 +114,6 @@ struct dw_dma_regs { + #define dma_writel_native writel + #endif + +-/* To access the registers in early stage of probe */ +-#define dma_read_byaddr(addr, name) \ +- dma_readl_native((addr) + offsetof(struct dw_dma_regs, name)) +- + /* Bitfields in DW_PARAMS */ + #define DW_PARAMS_NR_CHAN 8 /* number of channels */ + #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ +@@ -143,6 +139,10 @@ enum dw_dma_msize { + DW_DMA_MSIZE_256, + }; + ++/* Bitfields in LLP */ ++#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ ++#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ ++ + /* Bitfields in CTL_LO */ + #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ + #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +@@ -150,7 +150,7 @@ enum dw_dma_msize { + #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ + #define DWC_CTLL_DST_DEC (1<<7) + #define DWC_CTLL_DST_FIX (2<<7) +-#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ ++#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */ + #define DWC_CTLL_SRC_DEC (1<<9) + #define DWC_CTLL_SRC_FIX (2<<9) + #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +@@ -216,6 +216,8 @@ enum dw_dma_msize { + enum dw_dmac_flags { + DW_DMA_IS_CYCLIC = 0, + DW_DMA_IS_SOFT_LLP = 1, ++ DW_DMA_IS_PAUSED = 2, ++ DW_DMA_IS_INITIALIZED = 3, + }; + + struct dw_dma_chan { +@@ -224,8 +226,6 @@ struct dw_dma_chan { + u8 mask; + u8 priority; + enum dma_transfer_direction direction; +- bool paused; +- bool initialized; + + /* software emulation of the LLP transfers */ + struct list_head *tx_node_active; +@@ -236,8 +236,6 @@ struct dw_dma_chan { + unsigned long flags; + struct list_head active_list; + struct list_head queue; +- struct list_head free_list; +- u32 residue; + struct dw_cyclic_desc *cdesc; + + unsigned int descs_allocated; +@@ -249,8 +247,8 @@ struct dw_dma_chan { + /* custom slave configuration */ + u8 src_id; + u8 dst_id; +- u8 src_master; +- u8 dst_master; ++ u8 m_master; ++ u8 p_master; + + /* configuration passed via .device_config */ + struct dma_slave_config dma_sconfig; +@@ -283,9 +281,8 @@ struct dw_dma { + u8 all_chan_mask; + u8 in_use; + +- /* hardware configuration */ +- unsigned char nr_masters; +- unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; ++ /* platform data */ ++ struct dw_dma_platform_data *pdata; + }; + + static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +@@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s + return container_of(ddev, struct dw_dma, dma); + } + ++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO ++typedef __be32 __dw32; ++#else ++typedef __le32 __dw32; ++#endif ++ + /* LLI == Linked List Item; a.k.a. DMA block descriptor */ + struct dw_lli { + /* values that are not changed by hardware */ +- u32 sar; +- u32 dar; +- u32 llp; /* chain to next lli */ +- u32 ctllo; ++ __dw32 sar; ++ __dw32 dar; ++ __dw32 llp; /* chain to next lli */ ++ __dw32 ctllo; + /* values that may get written back: */ +- u32 ctlhi; ++ __dw32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ +- u32 sstat; +- u32 dstat; ++ __dw32 sstat; ++ __dw32 dstat; + }; + + struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + ++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO ++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) ++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) ++#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) ++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) ++#else ++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) ++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) ++#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) ++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) ++#endif ++ + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; + size_t total_len; ++ u32 residue; + }; + + #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) +--- a/include/linux/dma/dw.h ++++ b/include/linux/dma/dw.h +@@ -27,6 +27,7 @@ struct dw_dma; + * @regs: memory mapped I/O space + * @clk: hclk clock + * @dw: struct dw_dma that is filed by dw_dma_probe() ++ * @pdata: pointer to platform data + */ + struct dw_dma_chip { + struct device *dev; +@@ -34,10 +35,12 @@ struct dw_dma_chip { + void __iomem *regs; + struct clk *clk; + struct dw_dma *dw; ++ ++ const struct dw_dma_platform_data *pdata; + }; + + /* Export to the platform drivers */ +-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata); ++int dw_dma_probe(struct dw_dma_chip *chip); + int dw_dma_remove(struct dw_dma_chip *chip); + + /* DMA API extensions */ +diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h +index 03b6095..d15d8ba 100644 +--- a/include/linux/platform_data/dma-dw.h ++++ b/include/linux/platform_data/dma-dw.h +@@ -21,15 +21,15 @@ + * @dma_dev: required DMA master device + * @src_id: src request line + * @dst_id: dst request line +- * @src_master: src master for transfers on allocated channel. +- * @dst_master: dest master for transfers on allocated channel. ++ * @m_master: memory master for transfers on allocated channel ++ * @p_master: peripheral master for transfers on allocated channel + */ + struct dw_dma_slave { + struct device *dma_dev; + u8 src_id; + u8 dst_id; +- u8 src_master; +- u8 dst_master; ++ u8 m_master; ++ u8 p_master; + }; + + /** +@@ -43,7 +43,7 @@ struct dw_dma_slave { + * @block_size: Maximum block size supported by the controller + * @nr_masters: Number of AHB masters supported by the controller + * @data_width: Maximum data width supported by hardware per AHB master +- * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) ++ * (in bytes, power of 2) + */ + struct dw_dma_platform_data { + unsigned int nr_channels; +@@ -55,7 +55,7 @@ struct dw_dma_platform_data { + #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ + #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ + unsigned char chan_priority; +- unsigned short block_size; ++ unsigned int block_size; + unsigned char nr_masters; + unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; + }; +-- +2.8.1 + |