diff options
4 files changed, 2303 insertions, 0 deletions
diff --git a/target/linux/apm821xx/patches-4.4/010-dmaengine-Add-transfer-termination-synchronization-s.patch b/target/linux/apm821xx/patches-4.4/010-dmaengine-Add-transfer-termination-synchronization-s.patch new file mode 100644 index 0000000000..924f7970a8 --- /dev/null +++ b/target/linux/apm821xx/patches-4.4/010-dmaengine-Add-transfer-termination-synchronization-s.patch @@ -0,0 +1,143 @@ +From 7bd903c5ca47fde5ad52370a47776491813c772e Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi <peter.ujfalusi@ti.com> +Date: Mon, 14 Dec 2015 22:47:39 +0200 +Subject: [PATCH 1/3] dmaengine: core: Move and merge the code paths using + private_candidate + +Channel matching with private_candidate() is used in two paths, the error +checking is slightly different in them and they are duplicating code also. +Move the code under find_candidate() to provide consistent execution and +going to allow us to reuse this mode of channel lookup later. + +Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> +Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com> +Reviewed-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Vinod Koul <vinod.koul@intel.com> +--- + drivers/dma/dmaengine.c | 81 +++++++++++++++++++++++++------------------------ + 1 file changed, 42 insertions(+), 39 deletions(-) + +diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c +index f2cbff9..81a36fc 100644 +--- a/drivers/dma/dmaengine.c ++++ b/drivers/dma/dmaengine.c +@@ -542,6 +542,42 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, + return NULL; + } + ++static struct dma_chan *find_candidate(struct dma_device *device, ++ const dma_cap_mask_t *mask, ++ dma_filter_fn fn, void *fn_param) ++{ ++ struct dma_chan *chan = private_candidate(mask, device, fn, fn_param); ++ int err; ++ ++ if (chan) { ++ /* Found a suitable channel, try to grab, prep, and return it. ++ * We first set DMA_PRIVATE to disable balance_ref_count as this ++ * channel will not be published in the general-purpose ++ * allocator ++ */ ++ dma_cap_set(DMA_PRIVATE, device->cap_mask); ++ device->privatecnt++; ++ err = dma_chan_get(chan); ++ ++ if (err) { ++ if (err == -ENODEV) { ++ pr_debug("%s: %s module removed\n", __func__, ++ dma_chan_name(chan)); ++ list_del_rcu(&device->global_node); ++ } else ++ pr_debug("%s: failed to get %s: (%d)\n", ++ __func__, dma_chan_name(chan), err); ++ ++ if (--device->privatecnt == 0) ++ dma_cap_clear(DMA_PRIVATE, device->cap_mask); ++ ++ chan = ERR_PTR(err); ++ } ++ } ++ ++ return chan ? chan : ERR_PTR(-EPROBE_DEFER); ++} ++ + /** + * dma_get_slave_channel - try to get specific channel exclusively + * @chan: target channel +@@ -580,7 +616,6 @@ struct dma_chan *dma_get_any_slave_channel(struct dma_device *device) + { + dma_cap_mask_t mask; + struct dma_chan *chan; +- int err; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); +@@ -588,23 +623,11 @@ struct dma_chan *dma_get_any_slave_channel(struct dma_device *device) + /* lock against __dma_request_channel */ + mutex_lock(&dma_list_mutex); + +- chan = private_candidate(&mask, device, NULL, NULL); +- if (chan) { +- dma_cap_set(DMA_PRIVATE, device->cap_mask); +- device->privatecnt++; +- err = dma_chan_get(chan); +- if (err) { +- pr_debug("%s: failed to get %s: (%d)\n", +- __func__, dma_chan_name(chan), err); +- chan = NULL; +- if (--device->privatecnt == 0) +- dma_cap_clear(DMA_PRIVATE, device->cap_mask); +- } +- } ++ chan = find_candidate(device, &mask, NULL, NULL); + + mutex_unlock(&dma_list_mutex); + +- return chan; ++ return IS_ERR(chan) ? NULL : chan; + } + EXPORT_SYMBOL_GPL(dma_get_any_slave_channel); + +@@ -621,35 +644,15 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, + { + struct dma_device *device, *_d; + struct dma_chan *chan = NULL; +- int err; + + /* Find a channel */ + mutex_lock(&dma_list_mutex); + list_for_each_entry_safe(device, _d, &dma_device_list, global_node) { +- chan = private_candidate(mask, device, fn, fn_param); +- if (chan) { +- /* Found a suitable channel, try to grab, prep, and +- * return it. We first set DMA_PRIVATE to disable +- * balance_ref_count as this channel will not be +- * published in the general-purpose allocator +- */ +- dma_cap_set(DMA_PRIVATE, device->cap_mask); +- device->privatecnt++; +- err = dma_chan_get(chan); ++ chan = find_candidate(device, mask, fn, fn_param); ++ if (!IS_ERR(chan)) ++ break; + +- if (err == -ENODEV) { +- pr_debug("%s: %s module removed\n", +- __func__, dma_chan_name(chan)); +- list_del_rcu(&device->global_node); +- } else if (err) +- pr_debug("%s: failed to get %s: (%d)\n", +- __func__, dma_chan_name(chan), err); +- else +- break; +- if (--device->privatecnt == 0) +- dma_cap_clear(DMA_PRIVATE, device->cap_mask); +- chan = NULL; +- } ++ chan = NULL; + } + mutex_unlock(&dma_list_mutex); + +-- +2.8.1 + diff --git a/target/linux/apm821xx/patches-4.4/011-dmaengine-core-Introduce-new-universal-API-to-reques.patch b/target/linux/apm821xx/patches-4.4/011-dmaengine-core-Introduce-new-universal-API-to-reques.patch new file mode 100644 index 0000000000..0296714639 --- /dev/null +++ b/target/linux/apm821xx/patches-4.4/011-dmaengine-core-Introduce-new-universal-API-to-reques.patch @@ -0,0 +1,345 @@ +From a8135d0d79e9d0ad3a4ff494fceeaae838becf38 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi <peter.ujfalusi@ti.com> +Date: Mon, 14 Dec 2015 22:47:40 +0200 +Subject: [PATCH 2/3] dmaengine: core: Introduce new, universal API to request + a channel + +The two API function can cover most, if not all current APIs used to +request a channel. With minimal effort dmaengine drivers, platforms and +dmaengine user drivers can be converted to use the two function. + +struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask); + +To request any channel matching with the requested capabilities, can be +used to request channel for memcpy, memset, xor, etc where no hardware +synchronization is needed. + +struct dma_chan *dma_request_chan(struct device *dev, const char *name); +To request a slave channel. The dma_request_chan() will try to find the +channel via DT, ACPI or in case if the kernel booted in non DT/ACPI mode +it will use a filter lookup table and retrieves the needed information from +the dma_slave_map provided by the DMA drivers. +This legacy mode needs changes in platform code, in dmaengine drivers and +finally the dmaengine user drivers can be converted: + +For each dmaengine driver an array of DMA device, slave and the parameter +for the filter function needs to be added: + +static const struct dma_slave_map da830_edma_map[] = { + { "davinci-mcasp.0", "rx", EDMA_FILTER_PARAM(0, 0) }, + { "davinci-mcasp.0", "tx", EDMA_FILTER_PARAM(0, 1) }, + { "davinci-mcasp.1", "rx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcasp.1", "tx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcasp.2", "rx", EDMA_FILTER_PARAM(0, 4) }, + { "davinci-mcasp.2", "tx", EDMA_FILTER_PARAM(0, 5) }, + { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 14) }, + { "spi_davinci.0", "tx", EDMA_FILTER_PARAM(0, 15) }, + { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 16) }, + { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 17) }, + { "spi_davinci.1", "rx", EDMA_FILTER_PARAM(0, 18) }, + { "spi_davinci.1", "tx", EDMA_FILTER_PARAM(0, 19) }, +}; + +This information is going to be needed by the dmaengine driver, so +modification to the platform_data is needed, and the driver map should be +added to the pdata of the DMA driver: + +da8xx_edma0_pdata.slave_map = da830_edma_map; +da8xx_edma0_pdata.slavecnt = ARRAY_SIZE(da830_edma_map); + +The DMA driver then needs to configure the needed device -> filter_fn +mapping before it registers with dma_async_device_register() : + +ecc->dma_slave.filter_map.map = info->slave_map; +ecc->dma_slave.filter_map.mapcnt = info->slavecnt; +ecc->dma_slave.filter_map.fn = edma_filter_fn; + +When neither DT or ACPI lookup is available the dma_request_chan() will +try to match the requester's device name with the filter_map's list of +device names, when a match found it will use the information from the +dma_slave_map to get the channel with the dma_get_channel() internal +function. + +Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> +Reviewed-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Vinod Koul <vinod.koul@intel.com> +--- + Documentation/dmaengine/client.txt | 23 +++------- + drivers/dma/dmaengine.c | 89 +++++++++++++++++++++++++++++++++----- + include/linux/dmaengine.h | 51 +++++++++++++++++++--- + 3 files changed, 127 insertions(+), 36 deletions(-) + +diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt +index 11fb87f..4b04d89 100644 +--- a/Documentation/dmaengine/client.txt ++++ b/Documentation/dmaengine/client.txt +@@ -22,25 +22,14 @@ The slave DMA usage consists of following steps: + Channel allocation is slightly different in the slave DMA context, + client drivers typically need a channel from a particular DMA + controller only and even in some cases a specific channel is desired. +- To request a channel dma_request_channel() API is used. ++ To request a channel dma_request_chan() API is used. + + Interface: +- struct dma_chan *dma_request_channel(dma_cap_mask_t mask, +- dma_filter_fn filter_fn, +- void *filter_param); +- where dma_filter_fn is defined as: +- typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); +- +- The 'filter_fn' parameter is optional, but highly recommended for +- slave and cyclic channels as they typically need to obtain a specific +- DMA channel. +- +- When the optional 'filter_fn' parameter is NULL, dma_request_channel() +- simply returns the first channel that satisfies the capability mask. +- +- Otherwise, the 'filter_fn' routine will be called once for each free +- channel which has a capability in 'mask'. 'filter_fn' is expected to +- return 'true' when the desired DMA channel is found. ++ struct dma_chan *dma_request_chan(struct device *dev, const char *name); ++ ++ Which will find and return the 'name' DMA channel associated with the 'dev' ++ device. The association is done via DT, ACPI or board file based ++ dma_slave_map matching table. + + A channel allocated via this interface is exclusive to the caller, + until dma_release_channel() is called. +diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c +index 81a36fc..a094dbb 100644 +--- a/drivers/dma/dmaengine.c ++++ b/drivers/dma/dmaengine.c +@@ -43,6 +43,7 @@ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include <linux/platform_device.h> + #include <linux/dma-mapping.h> + #include <linux/init.h> + #include <linux/module.h> +@@ -665,27 +666,73 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, + } + EXPORT_SYMBOL_GPL(__dma_request_channel); + ++static const struct dma_slave_map *dma_filter_match(struct dma_device *device, ++ const char *name, ++ struct device *dev) ++{ ++ int i; ++ ++ if (!device->filter.mapcnt) ++ return NULL; ++ ++ for (i = 0; i < device->filter.mapcnt; i++) { ++ const struct dma_slave_map *map = &device->filter.map[i]; ++ ++ if (!strcmp(map->devname, dev_name(dev)) && ++ !strcmp(map->slave, name)) ++ return map; ++ } ++ ++ return NULL; ++} ++ + /** +- * dma_request_slave_channel_reason - try to allocate an exclusive slave channel ++ * dma_request_chan - try to allocate an exclusive slave channel + * @dev: pointer to client device structure + * @name: slave channel name + * + * Returns pointer to appropriate DMA channel on success or an error pointer. + */ +-struct dma_chan *dma_request_slave_channel_reason(struct device *dev, +- const char *name) ++struct dma_chan *dma_request_chan(struct device *dev, const char *name) + { ++ struct dma_device *d, *_d; ++ struct dma_chan *chan = NULL; ++ + /* If device-tree is present get slave info from here */ + if (dev->of_node) +- return of_dma_request_slave_channel(dev->of_node, name); ++ chan = of_dma_request_slave_channel(dev->of_node, name); + + /* If device was enumerated by ACPI get slave info from here */ +- if (ACPI_HANDLE(dev)) +- return acpi_dma_request_slave_chan_by_name(dev, name); ++ if (has_acpi_companion(dev) && !chan) ++ chan = acpi_dma_request_slave_chan_by_name(dev, name); ++ ++ if (chan) { ++ /* Valid channel found or requester need to be deferred */ ++ if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER) ++ return chan; ++ } ++ ++ /* Try to find the channel via the DMA filter map(s) */ ++ mutex_lock(&dma_list_mutex); ++ list_for_each_entry_safe(d, _d, &dma_device_list, global_node) { ++ dma_cap_mask_t mask; ++ const struct dma_slave_map *map = dma_filter_match(d, name, dev); ++ ++ if (!map) ++ continue; ++ ++ dma_cap_zero(mask); ++ dma_cap_set(DMA_SLAVE, mask); + +- return ERR_PTR(-ENODEV); ++ chan = find_candidate(d, &mask, d->filter.fn, map->param); ++ if (!IS_ERR(chan)) ++ break; ++ } ++ mutex_unlock(&dma_list_mutex); ++ ++ return chan ? chan : ERR_PTR(-EPROBE_DEFER); + } +-EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason); ++EXPORT_SYMBOL_GPL(dma_request_chan); + + /** + * dma_request_slave_channel - try to allocate an exclusive slave channel +@@ -697,17 +744,35 @@ EXPORT_SYMBOL_GPL(dma_request_slave_channel_reason); + struct dma_chan *dma_request_slave_channel(struct device *dev, + const char *name) + { +- struct dma_chan *ch = dma_request_slave_channel_reason(dev, name); ++ struct dma_chan *ch = dma_request_chan(dev, name); + if (IS_ERR(ch)) + return NULL; + +- dma_cap_set(DMA_PRIVATE, ch->device->cap_mask); +- ch->device->privatecnt++; +- + return ch; + } + EXPORT_SYMBOL_GPL(dma_request_slave_channel); + ++/** ++ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities ++ * @mask: capabilities that the channel must satisfy ++ * ++ * Returns pointer to appropriate DMA channel on success or an error pointer. ++ */ ++struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask) ++{ ++ struct dma_chan *chan; ++ ++ if (!mask) ++ return ERR_PTR(-ENODEV); ++ ++ chan = __dma_request_channel(mask, NULL, NULL); ++ if (!chan) ++ chan = ERR_PTR(-ENODEV); ++ ++ return chan; ++} ++EXPORT_SYMBOL_GPL(dma_request_chan_by_mask); ++ + void dma_release_channel(struct dma_chan *chan) + { + mutex_lock(&dma_list_mutex); +diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h +index c47c68e..d50a6b51 100644 +--- a/include/linux/dmaengine.h ++++ b/include/linux/dmaengine.h +@@ -607,11 +607,38 @@ enum dmaengine_alignment { + }; + + /** ++ * struct dma_slave_map - associates slave device and it's slave channel with ++ * parameter to be used by a filter function ++ * @devname: name of the device ++ * @slave: slave channel name ++ * @param: opaque parameter to pass to struct dma_filter.fn ++ */ ++struct dma_slave_map { ++ const char *devname; ++ const char *slave; ++ void *param; ++}; ++ ++/** ++ * struct dma_filter - information for slave device/channel to filter_fn/param ++ * mapping ++ * @fn: filter function callback ++ * @mapcnt: number of slave device/channel in the map ++ * @map: array of channel to filter mapping data ++ */ ++struct dma_filter { ++ dma_filter_fn fn; ++ int mapcnt; ++ const struct dma_slave_map *map; ++}; ++ ++/** + * struct dma_device - info on the entity supplying DMA services + * @chancnt: how many DMA channels are supported + * @privatecnt: how many DMA channels are requested by dma_request_channel + * @channels: the list of struct dma_chan + * @global_node: list_head for global dma_device_list ++ * @filter: information for device/slave to filter function/param mapping + * @cap_mask: one or more dma_capability flags + * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability +@@ -666,6 +693,7 @@ struct dma_device { + unsigned int privatecnt; + struct list_head channels; + struct list_head global_node; ++ struct dma_filter filter; + dma_cap_mask_t cap_mask; + unsigned short max_xor; + unsigned short max_pq; +@@ -1140,9 +1168,11 @@ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); + void dma_issue_pending_all(void); + struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, + dma_filter_fn fn, void *fn_param); +-struct dma_chan *dma_request_slave_channel_reason(struct device *dev, +- const char *name); + struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name); ++ ++struct dma_chan *dma_request_chan(struct device *dev, const char *name); ++struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask); ++ + void dma_release_channel(struct dma_chan *chan); + int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps); + #else +@@ -1166,16 +1196,21 @@ static inline struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, + { + return NULL; + } +-static inline struct dma_chan *dma_request_slave_channel_reason( +- struct device *dev, const char *name) +-{ +- return ERR_PTR(-ENODEV); +-} + static inline struct dma_chan *dma_request_slave_channel(struct device *dev, + const char *name) + { + return NULL; + } ++static inline struct dma_chan *dma_request_chan(struct device *dev, ++ const char *name) ++{ ++ return ERR_PTR(-ENODEV); ++} ++static inline struct dma_chan *dma_request_chan_by_mask( ++ const dma_cap_mask_t *mask) ++{ ++ return ERR_PTR(-ENODEV); ++} + static inline void dma_release_channel(struct dma_chan *chan) + { + } +@@ -1186,6 +1221,8 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, + } + #endif + ++#define dma_request_slave_channel_reason(dev, name) dma_request_chan(dev, name) ++ + static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) + { + struct dma_slave_caps caps; +-- +2.8.1 + diff --git a/target/linux/apm821xx/patches-4.4/012-dmaengine-Add-transfer-termination-synchronization-s.patch b/target/linux/apm821xx/patches-4.4/012-dmaengine-Add-transfer-termination-synchronization-s.patch new file mode 100644 index 0000000000..8fcf8caa8a --- /dev/null +++ b/target/linux/apm821xx/patches-4.4/012-dmaengine-Add-transfer-termination-synchronization-s.patch @@ -0,0 +1,293 @@ +From b36f09c3c441a6e59eab9315032e7d546571de3f Mon Sep 17 00:00:00 2001 +From: Lars-Peter Clausen <lars@metafoo.de> +Date: Tue, 20 Oct 2015 11:46:28 +0200 +Subject: [PATCH] dmaengine: Add transfer termination synchronization support + +The DMAengine API has a long standing race condition that is inherent to +the API itself. Calling dmaengine_terminate_all() is supposed to stop and +abort any pending or active transfers that have previously been submitted. +Unfortunately it is possible that this operation races against a currently +running (or with some drivers also scheduled) completion callback. + +Since the API allows dmaengine_terminate_all() to be called from atomic +context as well as from within a completion callback it is not possible to +synchronize to the execution of the completion callback from within +dmaengine_terminate_all() itself. + +This means that a user of the DMAengine API does not know when it is safe +to free resources used in the completion callback, which can result in a +use-after-free race condition. + +This patch addresses the issue by introducing an explicit synchronization +primitive to the DMAengine API called dmaengine_synchronize(). + +The existing dmaengine_terminate_all() is deprecated in favor of +dmaengine_terminate_sync() and dmaengine_terminate_async(). The former +aborts all pending and active transfers and synchronizes to the current +context, meaning it will wait until all running completion callbacks have +finished. This means it is only possible to call this function from +non-atomic context. The later function does not synchronize, but can still +be used in atomic context or from within a complete callback. It has to be +followed up by dmaengine_synchronize() before a client can free the +resources used in a completion callback. + +In addition to this the semantics of the device_terminate_all() callback +are slightly relaxed by this patch. It is now OK for a driver to only +schedule the termination of the active transfer, but does not necessarily +have to wait until the DMA controller has completely stopped. The driver +must ensure though that the controller has stopped and no longer accesses +any memory when the device_synchronize() callback returns. + +This was in part done since most drivers do not pay attention to this +anyway at the moment and to emphasize that this needs to be done when the +device_synchronize() callback is implemented. But it also helps with +implementing support for devices where stopping the controller can require +operations that may sleep. + +Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> +Signed-off-by: Vinod Koul <vinod.koul@intel.com> +--- + Documentation/dmaengine/client.txt | 38 ++++++++++++++- + Documentation/dmaengine/provider.txt | 20 +++++++- + drivers/dma/dmaengine.c | 5 +- + include/linux/dmaengine.h | 90 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 148 insertions(+), 5 deletions(-) + +diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt +index 11fb87f..d9f9f46 100644 +--- a/Documentation/dmaengine/client.txt ++++ b/Documentation/dmaengine/client.txt +@@ -128,7 +128,7 @@ The slave DMA usage consists of following steps: + transaction. + + For cyclic DMA, a callback function may wish to terminate the +- DMA via dmaengine_terminate_all(). ++ DMA via dmaengine_terminate_async(). + + Therefore, it is important that DMA engine drivers drop any + locks before calling the callback function which may cause a +@@ -166,12 +166,29 @@ The slave DMA usage consists of following steps: + + Further APIs: + +-1. int dmaengine_terminate_all(struct dma_chan *chan) ++1. int dmaengine_terminate_sync(struct dma_chan *chan) ++ int dmaengine_terminate_async(struct dma_chan *chan) ++ int dmaengine_terminate_all(struct dma_chan *chan) /* DEPRECATED */ + + This causes all activity for the DMA channel to be stopped, and may + discard data in the DMA FIFO which hasn't been fully transferred. + No callback functions will be called for any incomplete transfers. + ++ Two variants of this function are available. ++ ++ dmaengine_terminate_async() might not wait until the DMA has been fully ++ stopped or until any running complete callbacks have finished. But it is ++ possible to call dmaengine_terminate_async() from atomic context or from ++ within a complete callback. dmaengine_synchronize() must be called before it ++ is safe to free the memory accessed by the DMA transfer or free resources ++ accessed from within the complete callback. ++ ++ dmaengine_terminate_sync() will wait for the transfer and any running ++ complete callbacks to finish before it returns. But the function must not be ++ called from atomic context or from within a complete callback. ++ ++ dmaengine_terminate_all() is deprecated and should not be used in new code. ++ + 2. int dmaengine_pause(struct dma_chan *chan) + + This pauses activity on the DMA channel without data loss. +@@ -197,3 +214,20 @@ Further APIs: + a running DMA channel. It is recommended that DMA engine users + pause or stop (via dmaengine_terminate_all()) the channel before + using this API. ++ ++5. void dmaengine_synchronize(struct dma_chan *chan) ++ ++ Synchronize the termination of the DMA channel to the current context. ++ ++ This function should be used after dmaengine_terminate_async() to synchronize ++ the termination of the DMA channel to the current context. The function will ++ wait for the transfer and any running complete callbacks to finish before it ++ returns. ++ ++ If dmaengine_terminate_async() is used to stop the DMA channel this function ++ must be called before it is safe to free memory accessed by previously ++ submitted descriptors or to free any resources accessed within the complete ++ callback of previously submitted descriptors. ++ ++ The behavior of this function is undefined if dma_async_issue_pending() has ++ been called between dmaengine_terminate_async() and this function. +diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt +index 67d4ce4..122b7f4 100644 +--- a/Documentation/dmaengine/provider.txt ++++ b/Documentation/dmaengine/provider.txt +@@ -327,8 +327,24 @@ supported. + + * device_terminate_all + - Aborts all the pending and ongoing transfers on the channel +- - This command should operate synchronously on the channel, +- terminating right away all the channels ++ - For aborted transfers the complete callback should not be called ++ - Can be called from atomic context or from within a complete ++ callback of a descriptor. Must not sleep. Drivers must be able ++ to handle this correctly. ++ - Termination may be asynchronous. The driver does not have to ++ wait until the currently active transfer has completely stopped. ++ See device_synchronize. ++ ++ * device_synchronize ++ - Must synchronize the termination of a channel to the current ++ context. ++ - Must make sure that memory for previously submitted ++ descriptors is no longer accessed by the DMA controller. ++ - Must make sure that all complete callbacks for previously ++ submitted descriptors have finished running and none are ++ scheduled to run. ++ - May sleep. ++ + + Misc notes (stuff that should be documented, but don't really know + where to put them) +diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c +index 3ecec14..d6fc82e 100644 +--- a/drivers/dma/dmaengine.c ++++ b/drivers/dma/dmaengine.c +@@ -265,8 +265,11 @@ static void dma_chan_put(struct dma_chan *chan) + module_put(dma_chan_to_owner(chan)); + + /* This channel is not in use anymore, free it */ +- if (!chan->client_count && chan->device->device_free_chan_resources) ++ if (!chan->client_count && chan->device->device_free_chan_resources) { ++ /* Make sure all operations have completed */ ++ dmaengine_synchronize(chan); + chan->device->device_free_chan_resources(chan); ++ } + + /* If the channel is used via a DMA request router, free the mapping */ + if (chan->router && chan->router->route_free) { +diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h +index c47c68e..4662d9a 100644 +--- a/include/linux/dmaengine.h ++++ b/include/linux/dmaengine.h +@@ -654,6 +654,8 @@ enum dmaengine_alignment { + * paused. Returns 0 or an error code + * @device_terminate_all: Aborts all transfers on a channel. Returns 0 + * or an error code ++ * @device_synchronize: Synchronizes the termination of a transfers to the ++ * current context. + * @device_tx_status: poll for transaction completion, the optional + * txstate parameter can be supplied with a pointer to get a + * struct with auxiliary transfer status information, otherwise the call +@@ -737,6 +739,7 @@ struct dma_device { + int (*device_pause)(struct dma_chan *chan); + int (*device_resume)(struct dma_chan *chan); + int (*device_terminate_all)(struct dma_chan *chan); ++ void (*device_synchronize)(struct dma_chan *chan); + + enum dma_status (*device_tx_status)(struct dma_chan *chan, + dma_cookie_t cookie, +@@ -828,6 +831,13 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( + src_sg, src_nents, flags); + } + ++/** ++ * dmaengine_terminate_all() - Terminate all active DMA transfers ++ * @chan: The channel for which to terminate the transfers ++ * ++ * This function is DEPRECATED use either dmaengine_terminate_sync() or ++ * dmaengine_terminate_async() instead. ++ */ + static inline int dmaengine_terminate_all(struct dma_chan *chan) + { + if (chan->device->device_terminate_all) +@@ -836,6 +846,86 @@ static inline int dmaengine_terminate_all(struct dma_chan *chan) + return -ENOSYS; + } + ++/** ++ * dmaengine_terminate_async() - Terminate all active DMA transfers ++ * @chan: The channel for which to terminate the transfers ++ * ++ * Calling this function will terminate all active and pending descriptors ++ * that have previously been submitted to the channel. It is not guaranteed ++ * though that the transfer for the active descriptor has stopped when the ++ * function returns. Furthermore it is possible the complete callback of a ++ * submitted transfer is still running when this function returns. ++ * ++ * dmaengine_synchronize() needs to be called before it is safe to free ++ * any memory that is accessed by previously submitted descriptors or before ++ * freeing any resources accessed from within the completion callback of any ++ * perviously submitted descriptors. ++ * ++ * This function can be called from atomic context as well as from within a ++ * complete callback of a descriptor submitted on the same channel. ++ * ++ * If none of the two conditions above apply consider using ++ * dmaengine_terminate_sync() instead. ++ */ ++static inline int dmaengine_terminate_async(struct dma_chan *chan) ++{ ++ if (chan->device->device_terminate_all) ++ return chan->device->device_terminate_all(chan); ++ ++ return -EINVAL; ++} ++ ++/** ++ * dmaengine_synchronize() - Synchronize DMA channel termination ++ * @chan: The channel to synchronize ++ * ++ * Synchronizes to the DMA channel termination to the current context. When this ++ * function returns it is guaranteed that all transfers for previously issued ++ * descriptors have stopped and and it is safe to free the memory assoicated ++ * with them. Furthermore it is guaranteed that all complete callback functions ++ * for a previously submitted descriptor have finished running and it is safe to ++ * free resources accessed from within the complete callbacks. ++ * ++ * The behavior of this function is undefined if dma_async_issue_pending() has ++ * been called between dmaengine_terminate_async() and this function. ++ * ++ * This function must only be called from non-atomic context and must not be ++ * called from within a complete callback of a descriptor submitted on the same ++ * channel. ++ */ ++static inline void dmaengine_synchronize(struct dma_chan *chan) ++{ ++ if (chan->device->device_synchronize) ++ chan->device->device_synchronize(chan); ++} ++ ++/** ++ * dmaengine_terminate_sync() - Terminate all active DMA transfers ++ * @chan: The channel for which to terminate the transfers ++ * ++ * Calling this function will terminate all active and pending transfers ++ * that have previously been submitted to the channel. It is similar to ++ * dmaengine_terminate_async() but guarantees that the DMA transfer has actually ++ * stopped and that all complete callbacks have finished running when the ++ * function returns. ++ * ++ * This function must only be called from non-atomic context and must not be ++ * called from within a complete callback of a descriptor submitted on the same ++ * channel. ++ */ ++static inline int dmaengine_terminate_sync(struct dma_chan *chan) ++{ ++ int ret; ++ ++ ret = dmaengine_terminate_async(chan); ++ if (ret) ++ return ret; ++ ++ dmaengine_synchronize(chan); ++ ++ return 0; ++} ++ + static inline int dmaengine_pause(struct dma_chan *chan) + { + if (chan->device->device_pause) +-- +2.8.1 + diff --git a/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch new file mode 100644 index 0000000000..96b11a82b6 --- /dev/null +++ b/target/linux/apm821xx/patches-4.4/015-dmaengine-dw-fixed.patch @@ -0,0 +1,1522 @@ +From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> +Subject: [PATCH v6 0/4] Fixes / cleanups in dw_dmac (affects on few subsystems) +Date: Mon, 25 Apr 2016 15:35:05 +0300 + +This patch series (v3: http://www.spinics.net/lists/kernel/msg2215303.html) +contains a number of mostly minor fixes and cleanups for the DW DMA driver. A +couple of them affect the DT binding so these may need to be updated to +maintain compatibility (old format is still supported though). The rest should +be relatively straight-forward. + +This version has been tested on the following bare metal platforms: +- ATNGW100 (avr32 based platform) with dmatest +- Sam460ex (powerpc 44x based platform) with SATA +- Intel Braswell with UART +- Intel Galileo (Intel Quark based platform) with UART + +(SATA driver and Intel Galileo UART support are based on this series and just + published recently for a review) + +Vinod, there are few patch sets developed on top of this one, so, the idea is +to keep this in an immuutable branch / tag. + +Changes since v5: +- fixed an issue found by kbuildbot + +Changes since v4: +- send proper set of patches +- add changelog + +Changes since v3: +- add patch 1 to check value of dma-masters property +- drop the upstreamed patches +- update patch 2 to keep an array for data-width property as well + +Changes since v2: +- add patch 1 to fix master selection which was broken for long time +- remove "use field-by-field initialization" patch since like Mans metioned in + has mostly no value and even might increase error prone +- rebase on top of recent linux-next +- wide testing on several platforms + +Changes since v1: +- zeroing struct dw_dma_slave before use +- fall back to old data_width property if data-width is not found +- append tags for few patches +- correct title of cover letter +- rebase on top of recent linux-next + +Andy Shevchenko (4): + dmaengine: dw: platform: check nr_masters to be non-zero + dmaengine: dw: revisit data_width property + dmaengine: dw: keep entire platform data in struct dw_dma + dmaengine: dw: pass platform data via struct dw_dma_chip + + Documentation/devicetree/bindings/dma/snps-dma.txt | 6 +- + arch/arc/boot/dts/abilis_tb10x.dtsi | 2 +- + arch/arm/boot/dts/spear13xx.dtsi | 4 +- + drivers/ata/sata_dwc_460ex.c | 2 +- + drivers/dma/dw/core.c | 75 ++++++++-------------- + drivers/dma/dw/pci.c | 5 +- + drivers/dma/dw/platform.c | 32 +++++---- + drivers/dma/dw/regs.h | 5 +- + include/linux/dma/dw.h | 5 +- + include/linux/platform_data/dma-dw.h | 4 +- + sound/soc/intel/common/sst-firmware.c | 2 +- + 11 files changed, 64 insertions(+), 78 deletions(-) + +--- a/drivers/dma/dw/core.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/core.c 2016-05-21 22:47:08.665465180 +0200 +@@ -45,22 +45,19 @@ + DW_DMA_MSIZE_16; \ + u8 _dmsize = _is_slave ? _sconfig->dst_maxburst : \ + DW_DMA_MSIZE_16; \ ++ u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ? \ ++ _dwc->p_master : _dwc->m_master; \ ++ u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ? \ ++ _dwc->p_master : _dwc->m_master; \ + \ + (DWC_CTLL_DST_MSIZE(_dmsize) \ + | DWC_CTLL_SRC_MSIZE(_smsize) \ + | DWC_CTLL_LLP_D_EN \ + | DWC_CTLL_LLP_S_EN \ +- | DWC_CTLL_DMS(_dwc->dst_master) \ +- | DWC_CTLL_SMS(_dwc->src_master)); \ ++ | DWC_CTLL_DMS(_dms) \ ++ | DWC_CTLL_SMS(_sms)); \ + }) + +-/* +- * Number of descriptors to allocate for each channel. This should be +- * made configurable somehow; preferably, the clients (at least the +- * ones using slave transfers) should be able to give us a hint. +- */ +-#define NR_DESCS_PER_CHANNEL 64 +- + /* The set of bus widths supported by the DMA controller */ + #define DW_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ +@@ -80,51 +77,65 @@ static struct dw_desc *dwc_first_active( + return to_dw_desc(dwc->active_list.next); + } + +-static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) ++static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) + { +- struct dw_desc *desc, *_desc; +- struct dw_desc *ret = NULL; +- unsigned int i = 0; +- unsigned long flags; ++ struct dw_desc *desc = txd_to_dw_desc(tx); ++ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); ++ dma_cookie_t cookie; ++ unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); +- list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { +- i++; +- if (async_tx_test_ack(&desc->txd)) { +- list_del(&desc->desc_node); +- ret = desc; +- break; +- } +- dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); +- } ++ cookie = dma_cookie_assign(tx); ++ ++ /* ++ * REVISIT: We should attempt to chain as many descriptors as ++ * possible, perhaps even appending to those already submitted ++ * for DMA. But this is hard to do in a race-free manner. ++ */ ++ ++ list_add_tail(&desc->desc_node, &dwc->queue); + spin_unlock_irqrestore(&dwc->lock, flags); ++ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", ++ __func__, desc->txd.cookie); + +- dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); ++ return cookie; ++} + +- return ret; ++static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) ++{ ++ struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ struct dw_desc *desc; ++ dma_addr_t phys; ++ ++ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys); ++ if (!desc) ++ return NULL; ++ ++ dwc->descs_allocated++; ++ INIT_LIST_HEAD(&desc->tx_list); ++ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan); ++ desc->txd.tx_submit = dwc_tx_submit; ++ desc->txd.flags = DMA_CTRL_ACK; ++ desc->txd.phys = phys; ++ return desc; + } + +-/* +- * Move a descriptor, including any children, to the free list. +- * `desc' must not be on any lists. +- */ + static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) + { +- unsigned long flags; ++ struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ struct dw_desc *child, *_next; + +- if (desc) { +- struct dw_desc *child; ++ if (unlikely(!desc)) ++ return; + +- spin_lock_irqsave(&dwc->lock, flags); +- list_for_each_entry(child, &desc->tx_list, desc_node) +- dev_vdbg(chan2dev(&dwc->chan), +- "moving child desc %p to freelist\n", +- child); +- list_splice_init(&desc->tx_list, &dwc->free_list); +- dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); +- list_add(&desc->desc_node, &dwc->free_list); +- spin_unlock_irqrestore(&dwc->lock, flags); ++ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) { ++ list_del(&child->desc_node); ++ dma_pool_free(dw->desc_pool, child, child->txd.phys); ++ dwc->descs_allocated--; + } ++ ++ dma_pool_free(dw->desc_pool, desc, desc->txd.phys); ++ dwc->descs_allocated--; + } + + static void dwc_initialize(struct dw_dma_chan *dwc) +@@ -133,7 +144,7 @@ static void dwc_initialize(struct dw_dma + u32 cfghi = DWC_CFGH_FIFO_MODE; + u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); + +- if (dwc->initialized == true) ++ if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags)) + return; + + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); +@@ -146,26 +157,11 @@ static void dwc_initialize(struct dw_dma + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); + +- dwc->initialized = true; ++ set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); + } + + /*----------------------------------------------------------------------*/ + +-static inline unsigned int dwc_fast_ffs(unsigned long long v) +-{ +- /* +- * We can be a lot more clever here, but this should take care +- * of the most common optimization. +- */ +- if (!(v & 7)) +- return 3; +- else if (!(v & 3)) +- return 2; +- else if (!(v & 1)) +- return 1; +- return 0; +-} +- + static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc) + { + dev_err(chan2dev(&dwc->chan), +@@ -197,12 +193,12 @@ static inline void dwc_do_single_block(s + * Software emulation of LLP mode relies on interrupts to continue + * multi block transfer. + */ +- ctllo = desc->lli.ctllo | DWC_CTLL_INT_EN; ++ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN; + +- channel_writel(dwc, SAR, desc->lli.sar); +- channel_writel(dwc, DAR, desc->lli.dar); ++ channel_writel(dwc, SAR, lli_read(desc, sar)); ++ channel_writel(dwc, DAR, lli_read(desc, dar)); + channel_writel(dwc, CTL_LO, ctllo); +- channel_writel(dwc, CTL_HI, desc->lli.ctlhi); ++ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi)); + channel_set_bit(dw, CH_EN, dwc->mask); + + /* Move pointer to next descriptor */ +@@ -213,6 +209,7 @@ static inline void dwc_do_single_block(s + static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) + { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); ++ u8 lms = DWC_LLP_LMS(dwc->m_master); + unsigned long was_soft_llp; + + /* ASSERT: channel is idle */ +@@ -237,7 +234,7 @@ static void dwc_dostart(struct dw_dma_ch + + dwc_initialize(dwc); + +- dwc->residue = first->total_len; ++ first->residue = first->total_len; + dwc->tx_node_active = &first->tx_list; + + /* Submit first block */ +@@ -248,9 +245,8 @@ static void dwc_dostart(struct dw_dma_ch + + dwc_initialize(dwc); + +- channel_writel(dwc, LLP, first->txd.phys); +- channel_writel(dwc, CTL_LO, +- DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); ++ channel_writel(dwc, LLP, first->txd.phys | lms); ++ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); + } +@@ -293,11 +289,7 @@ dwc_descriptor_complete(struct dw_dma_ch + list_for_each_entry(child, &desc->tx_list, desc_node) + async_tx_ack(&child->txd); + async_tx_ack(&desc->txd); +- +- list_splice_init(&desc->tx_list, &dwc->free_list); +- list_move(&desc->desc_node, &dwc->free_list); +- +- dma_descriptor_unmap(txd); ++ dwc_desc_put(dwc, desc); + spin_unlock_irqrestore(&dwc->lock, flags); + + if (callback) +@@ -368,11 +360,11 @@ static void dwc_scan_descriptors(struct + + head = &desc->tx_list; + if (active != head) { +- /* Update desc to reflect last sent one */ +- if (active != head->next) +- desc = to_dw_desc(active->prev); +- +- dwc->residue -= desc->len; ++ /* Update residue to reflect last sent descriptor */ ++ if (active == head->next) ++ desc->residue -= desc->len; ++ else ++ desc->residue -= to_dw_desc(active->prev)->len; + + child = to_dw_desc(active); + +@@ -387,8 +379,6 @@ static void dwc_scan_descriptors(struct + clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags); + } + +- dwc->residue = 0; +- + spin_unlock_irqrestore(&dwc->lock, flags); + + dwc_complete_all(dw, dwc); +@@ -396,7 +386,6 @@ static void dwc_scan_descriptors(struct + } + + if (list_empty(&dwc->active_list)) { +- dwc->residue = 0; + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } +@@ -411,31 +400,31 @@ static void dwc_scan_descriptors(struct + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* Initial residue value */ +- dwc->residue = desc->total_len; ++ desc->residue = desc->total_len; + + /* Check first descriptors addr */ +- if (desc->txd.phys == llp) { ++ if (desc->txd.phys == DWC_LLP_LOC(llp)) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* Check first descriptors llp */ +- if (desc->lli.llp == llp) { ++ if (lli_read(desc, llp) == llp) { + /* This one is currently in progress */ +- dwc->residue -= dwc_get_sent(dwc); ++ desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + +- dwc->residue -= desc->len; ++ desc->residue -= desc->len; + list_for_each_entry(child, &desc->tx_list, desc_node) { +- if (child->lli.llp == llp) { ++ if (lli_read(child, llp) == llp) { + /* Currently in progress */ +- dwc->residue -= dwc_get_sent(dwc); ++ desc->residue -= dwc_get_sent(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } +- dwc->residue -= child->len; ++ desc->residue -= child->len; + } + + /* +@@ -457,10 +446,14 @@ static void dwc_scan_descriptors(struct + spin_unlock_irqrestore(&dwc->lock, flags); + } + +-static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) ++static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc) + { + dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", +- lli->sar, lli->dar, lli->llp, lli->ctlhi, lli->ctllo); ++ lli_read(desc, sar), ++ lli_read(desc, dar), ++ lli_read(desc, llp), ++ lli_read(desc, ctlhi), ++ lli_read(desc, ctllo)); + } + + static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +@@ -496,9 +489,9 @@ static void dwc_handle_error(struct dw_d + */ + dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n" + " cookie: %d\n", bad_desc->txd.cookie); +- dwc_dump_lli(dwc, &bad_desc->lli); ++ dwc_dump_lli(dwc, bad_desc); + list_for_each_entry(child, &bad_desc->tx_list, desc_node) +- dwc_dump_lli(dwc, &child->lli); ++ dwc_dump_lli(dwc, child); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -549,7 +542,7 @@ static void dwc_handle_cyclic(struct dw_ + */ + if (unlikely(status_err & dwc->mask) || + unlikely(status_xfer & dwc->mask)) { +- int i; ++ unsigned int i; + + dev_err(chan2dev(&dwc->chan), + "cyclic DMA unexpected %s interrupt, stopping DMA transfer\n", +@@ -571,7 +564,7 @@ static void dwc_handle_cyclic(struct dw_ + dma_writel(dw, CLEAR.XFER, dwc->mask); + + for (i = 0; i < dwc->cdesc->periods; i++) +- dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); ++ dwc_dump_lli(dwc, dwc->cdesc->desc[i]); + + spin_unlock_irqrestore(&dwc->lock, flags); + } +@@ -589,7 +582,7 @@ static void dw_dma_tasklet(unsigned long + u32 status_block; + u32 status_xfer; + u32 status_err; +- int i; ++ unsigned int i; + + status_block = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.XFER); +@@ -616,12 +609,17 @@ static void dw_dma_tasklet(unsigned long + static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) + { + struct dw_dma *dw = dev_id; +- u32 status = dma_readl(dw, STATUS_INT); ++ u32 status; ++ ++ /* Check if we have any interrupt from the DMAC which is not in use */ ++ if (!dw->in_use) ++ return IRQ_NONE; + ++ status = dma_readl(dw, STATUS_INT); + dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status); + + /* Check if we have any interrupt from the DMAC */ +- if (!status || !dw->in_use) ++ if (!status) + return IRQ_NONE; + + /* +@@ -653,30 +651,6 @@ static irqreturn_t dw_dma_interrupt(int + + /*----------------------------------------------------------------------*/ + +-static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +-{ +- struct dw_desc *desc = txd_to_dw_desc(tx); +- struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); +- dma_cookie_t cookie; +- unsigned long flags; +- +- spin_lock_irqsave(&dwc->lock, flags); +- cookie = dma_cookie_assign(tx); +- +- /* +- * REVISIT: We should attempt to chain as many descriptors as +- * possible, perhaps even appending to those already submitted +- * for DMA. But this is hard to do in a race-free manner. +- */ +- +- dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); +- list_add_tail(&desc->desc_node, &dwc->queue); +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- return cookie; +-} +- + static struct dma_async_tx_descriptor * + dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +@@ -688,10 +662,12 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + struct dw_desc *prev; + size_t xfer_count; + size_t offset; ++ u8 m_master = dwc->m_master; + unsigned int src_width; + unsigned int dst_width; +- unsigned int data_width; ++ unsigned int data_width = dw->pdata->data_width[m_master]; + u32 ctllo; ++ u8 lms = DWC_LLP_LMS(m_master); + + dev_vdbg(chan2dev(chan), + "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__, +@@ -704,11 +680,7 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + + dwc->direction = DMA_MEM_TO_MEM; + +- data_width = min_t(unsigned int, dw->data_width[dwc->src_master], +- dw->data_width[dwc->dst_master]); +- +- src_width = dst_width = min_t(unsigned int, data_width, +- dwc_fast_ffs(src | dest | len)); ++ src_width = dst_width = __ffs(data_width | src | dest | len); + + ctllo = DWC_DEFAULT_CTLLO(chan) + | DWC_CTLL_DST_WIDTH(dst_width) +@@ -726,27 +698,27 @@ dwc_prep_dma_memcpy(struct dma_chan *cha + if (!desc) + goto err_desc_get; + +- desc->lli.sar = src + offset; +- desc->lli.dar = dest + offset; +- desc->lli.ctllo = ctllo; +- desc->lli.ctlhi = xfer_count; ++ lli_write(desc, sar, src + offset); ++ lli_write(desc, dar, dest + offset); ++ lli_write(desc, ctllo, ctllo); ++ lli_write(desc, ctlhi, xfer_count); + desc->len = xfer_count << src_width; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ +- prev->lli.ctllo |= DWC_CTLL_INT_EN; ++ lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; ++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->txd.flags = flags; + first->total_len = len; + +@@ -768,10 +740,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo; ++ u8 m_master = dwc->m_master; ++ u8 lms = DWC_LLP_LMS(m_master); + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; +- unsigned int data_width; ++ unsigned int data_width = dw->pdata->data_width[m_master]; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; +@@ -797,8 +771,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) : + DWC_CTLL_FC(DW_DMA_FC_D_M2P); + +- data_width = dw->data_width[dwc->src_master]; +- + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; +@@ -806,17 +778,16 @@ dwc_prep_slave_sg(struct dma_chan *chan, + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + +- mem_width = min_t(unsigned int, +- data_width, dwc_fast_ffs(mem | len)); ++ mem_width = __ffs(data_width | mem | len); + + slave_sg_todev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + +- desc->lli.sar = mem; +- desc->lli.dar = reg; +- desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); ++ lli_write(desc, sar, mem); ++ lli_write(desc, dar, reg); ++ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width)); + if ((len >> mem_width) > dwc->block_size) { + dlen = dwc->block_size << mem_width; + mem += dlen; +@@ -826,15 +797,14 @@ slave_sg_todev_fill_desc: + len = 0; + } + +- desc->lli.ctlhi = dlen >> mem_width; ++ lli_write(desc, ctlhi, dlen >> mem_width); + desc->len = dlen; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + total_len += dlen; +@@ -854,8 +824,6 @@ slave_sg_todev_fill_desc: + ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) : + DWC_CTLL_FC(DW_DMA_FC_D_P2M); + +- data_width = dw->data_width[dwc->dst_master]; +- + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len, dlen, mem; +@@ -863,17 +831,16 @@ slave_sg_todev_fill_desc: + mem = sg_dma_address(sg); + len = sg_dma_len(sg); + +- mem_width = min_t(unsigned int, +- data_width, dwc_fast_ffs(mem | len)); ++ mem_width = __ffs(data_width | mem | len); + + slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + +- desc->lli.sar = reg; +- desc->lli.dar = mem; +- desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); ++ lli_write(desc, sar, reg); ++ lli_write(desc, dar, mem); ++ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width)); + if ((len >> reg_width) > dwc->block_size) { + dlen = dwc->block_size << reg_width; + mem += dlen; +@@ -882,15 +849,14 @@ slave_sg_fromdev_fill_desc: + dlen = len; + len = 0; + } +- desc->lli.ctlhi = dlen >> reg_width; ++ lli_write(desc, ctlhi, dlen >> reg_width); + desc->len = dlen; + + if (!first) { + first = desc; + } else { +- prev->lli.llp = desc->txd.phys; +- list_add_tail(&desc->desc_node, +- &first->tx_list); ++ lli_write(prev, llp, desc->txd.phys | lms); ++ list_add_tail(&desc->desc_node, &first->tx_list); + } + prev = desc; + total_len += dlen; +@@ -905,9 +871,10 @@ slave_sg_fromdev_fill_desc: + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ +- prev->lli.ctllo |= DWC_CTLL_INT_EN; ++ lli_set(prev, ctllo, DWC_CTLL_INT_EN); + + prev->lli.llp = 0; ++ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + first->total_len = total_len; + + return &first->txd; +@@ -932,8 +899,8 @@ bool dw_dma_filter(struct dma_chan *chan + dwc->src_id = dws->src_id; + dwc->dst_id = dws->dst_id; + +- dwc->src_master = dws->src_master; +- dwc->dst_master = dws->dst_master; ++ dwc->m_master = dws->m_master; ++ dwc->p_master = dws->p_master; + + return true; + } +@@ -986,7 +953,7 @@ static int dwc_pause(struct dma_chan *ch + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--) + udelay(2); + +- dwc->paused = true; ++ set_bit(DW_DMA_IS_PAUSED, &dwc->flags); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -999,7 +966,7 @@ static inline void dwc_chan_resume(struc + + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); + +- dwc->paused = false; ++ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags); + } + + static int dwc_resume(struct dma_chan *chan) +@@ -1007,12 +974,10 @@ static int dwc_resume(struct dma_chan *c + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; + +- if (!dwc->paused) +- return 0; +- + spin_lock_irqsave(&dwc->lock, flags); + +- dwc_chan_resume(dwc); ++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags)) ++ dwc_chan_resume(dwc); + + spin_unlock_irqrestore(&dwc->lock, flags); + +@@ -1048,16 +1013,37 @@ static int dwc_terminate_all(struct dma_ + return 0; + } + +-static inline u32 dwc_get_residue(struct dw_dma_chan *dwc) ++static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c) ++{ ++ struct dw_desc *desc; ++ ++ list_for_each_entry(desc, &dwc->active_list, desc_node) ++ if (desc->txd.cookie == c) ++ return desc; ++ ++ return NULL; ++} ++ ++static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie) + { ++ struct dw_desc *desc; + unsigned long flags; + u32 residue; + + spin_lock_irqsave(&dwc->lock, flags); + +- residue = dwc->residue; +- if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) +- residue -= dwc_get_sent(dwc); ++ desc = dwc_find_desc(dwc, cookie); ++ if (desc) { ++ if (desc == dwc_first_active(dwc)) { ++ residue = desc->residue; ++ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue) ++ residue -= dwc_get_sent(dwc); ++ } else { ++ residue = desc->total_len; ++ } ++ } else { ++ residue = 0; ++ } + + spin_unlock_irqrestore(&dwc->lock, flags); + return residue; +@@ -1078,10 +1064,12 @@ dwc_tx_status(struct dma_chan *chan, + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + ret = dma_cookie_status(chan, cookie, txstate); +- if (ret != DMA_COMPLETE) +- dma_set_residue(txstate, dwc_get_residue(dwc)); ++ if (ret == DMA_COMPLETE) ++ return ret; ++ ++ dma_set_residue(txstate, dwc_get_residue(dwc, cookie)); + +- if (dwc->paused && ret == DMA_IN_PROGRESS) ++ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS) + return DMA_PAUSED; + + return ret; +@@ -1102,7 +1090,7 @@ static void dwc_issue_pending(struct dma + + static void dw_dma_off(struct dw_dma *dw) + { +- int i; ++ unsigned int i; + + dma_writel(dw, CFG, 0); + +@@ -1116,7 +1104,7 @@ static void dw_dma_off(struct dw_dma *dw + cpu_relax(); + + for (i = 0; i < dw->dma.chancnt; i++) +- dw->chan[i].initialized = false; ++ clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags); + } + + static void dw_dma_on(struct dw_dma *dw) +@@ -1128,9 +1116,6 @@ static int dwc_alloc_chan_resources(stru + { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); +- struct dw_desc *desc; +- int i; +- unsigned long flags; + + dev_vdbg(chan2dev(chan), "%s\n", __func__); + +@@ -1161,48 +1146,13 @@ static int dwc_alloc_chan_resources(stru + dw_dma_on(dw); + dw->in_use |= dwc->mask; + +- spin_lock_irqsave(&dwc->lock, flags); +- i = dwc->descs_allocated; +- while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { +- dma_addr_t phys; +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- desc = dma_pool_alloc(dw->desc_pool, GFP_ATOMIC, &phys); +- if (!desc) +- goto err_desc_alloc; +- +- memset(desc, 0, sizeof(struct dw_desc)); +- +- INIT_LIST_HEAD(&desc->tx_list); +- dma_async_tx_descriptor_init(&desc->txd, chan); +- desc->txd.tx_submit = dwc_tx_submit; +- desc->txd.flags = DMA_CTRL_ACK; +- desc->txd.phys = phys; +- +- dwc_desc_put(dwc, desc); +- +- spin_lock_irqsave(&dwc->lock, flags); +- i = ++dwc->descs_allocated; +- } +- +- spin_unlock_irqrestore(&dwc->lock, flags); +- +- dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); +- +- return i; +- +-err_desc_alloc: +- dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); +- +- return i; ++ return 0; + } + + static void dwc_free_chan_resources(struct dma_chan *chan) + { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); +- struct dw_desc *desc, *_desc; + unsigned long flags; + LIST_HEAD(list); + +@@ -1215,17 +1165,15 @@ static void dwc_free_chan_resources(stru + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_irqsave(&dwc->lock, flags); +- list_splice_init(&dwc->free_list, &list); +- dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + +- dwc->src_master = 0; +- dwc->dst_master = 0; ++ dwc->m_master = 0; ++ dwc->p_master = 0; + +- dwc->initialized = false; ++ clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags); + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); +@@ -1239,11 +1187,6 @@ static void dwc_free_chan_resources(stru + if (!dw->in_use) + dw_dma_off(dw); + +- list_for_each_entry_safe(desc, _desc, &list, desc_node) { +- dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); +- dma_pool_free(dw->desc_pool, desc, desc->txd.phys); +- } +- + dev_vdbg(chan2dev(chan), "%s: done\n", __func__); + } + +@@ -1321,6 +1264,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + struct dw_cyclic_desc *retval = NULL; + struct dw_desc *desc; + struct dw_desc *last = NULL; ++ u8 lms = DWC_LLP_LMS(dwc->m_master); + unsigned long was_cyclic; + unsigned int reg_width; + unsigned int periods; +@@ -1374,9 +1318,6 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + + retval = ERR_PTR(-ENOMEM); + +- if (periods > NR_DESCS_PER_CHANNEL) +- goto out_err; +- + cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL); + if (!cdesc) + goto out_err; +@@ -1392,50 +1333,50 @@ struct dw_cyclic_desc *dw_dma_cyclic_pre + + switch (direction) { + case DMA_MEM_TO_DEV: +- desc->lli.dar = sconfig->dst_addr; +- desc->lli.sar = buf_addr + (period_len * i); +- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) +- | DWC_CTLL_DST_WIDTH(reg_width) +- | DWC_CTLL_SRC_WIDTH(reg_width) +- | DWC_CTLL_DST_FIX +- | DWC_CTLL_SRC_INC +- | DWC_CTLL_INT_EN); +- +- desc->lli.ctllo |= sconfig->device_fc ? +- DWC_CTLL_FC(DW_DMA_FC_P_M2P) : +- DWC_CTLL_FC(DW_DMA_FC_D_M2P); ++ lli_write(desc, dar, sconfig->dst_addr); ++ lli_write(desc, sar, buf_addr + period_len * i); ++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) ++ | DWC_CTLL_DST_WIDTH(reg_width) ++ | DWC_CTLL_SRC_WIDTH(reg_width) ++ | DWC_CTLL_DST_FIX ++ | DWC_CTLL_SRC_INC ++ | DWC_CTLL_INT_EN)); ++ ++ lli_set(desc, ctllo, sconfig->device_fc ? ++ DWC_CTLL_FC(DW_DMA_FC_P_M2P) : ++ DWC_CTLL_FC(DW_DMA_FC_D_M2P)); + + break; + case DMA_DEV_TO_MEM: +- desc->lli.dar = buf_addr + (period_len * i); +- desc->lli.sar = sconfig->src_addr; +- desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan) +- | DWC_CTLL_SRC_WIDTH(reg_width) +- | DWC_CTLL_DST_WIDTH(reg_width) +- | DWC_CTLL_DST_INC +- | DWC_CTLL_SRC_FIX +- | DWC_CTLL_INT_EN); +- +- desc->lli.ctllo |= sconfig->device_fc ? +- DWC_CTLL_FC(DW_DMA_FC_P_P2M) : +- DWC_CTLL_FC(DW_DMA_FC_D_P2M); ++ lli_write(desc, dar, buf_addr + period_len * i); ++ lli_write(desc, sar, sconfig->src_addr); ++ lli_write(desc, ctllo, (DWC_DEFAULT_CTLLO(chan) ++ | DWC_CTLL_SRC_WIDTH(reg_width) ++ | DWC_CTLL_DST_WIDTH(reg_width) ++ | DWC_CTLL_DST_INC ++ | DWC_CTLL_SRC_FIX ++ | DWC_CTLL_INT_EN)); ++ ++ lli_set(desc, ctllo, sconfig->device_fc ? ++ DWC_CTLL_FC(DW_DMA_FC_P_P2M) : ++ DWC_CTLL_FC(DW_DMA_FC_D_P2M)); + + break; + default: + break; + } + +- desc->lli.ctlhi = (period_len >> reg_width); ++ lli_write(desc, ctlhi, period_len >> reg_width); + cdesc->desc[i] = desc; + + if (last) +- last->lli.llp = desc->txd.phys; ++ lli_write(last, llp, desc->txd.phys | lms); + + last = desc; + } + + /* Let's make a cyclic list */ +- last->lli.llp = cdesc->desc[0]->txd.phys; ++ lli_write(last, llp, cdesc->desc[0]->txd.phys | lms); + + dev_dbg(chan2dev(&dwc->chan), + "cyclic prepared buf %pad len %zu period %zu periods %d\n", +@@ -1466,7 +1407,7 @@ void dw_dma_cyclic_free(struct dma_chan + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + struct dw_cyclic_desc *cdesc = dwc->cdesc; +- int i; ++ unsigned int i; + unsigned long flags; + + dev_dbg(chan2dev(&dwc->chan), "%s\n", __func__); +@@ -1490,32 +1431,38 @@ void dw_dma_cyclic_free(struct dma_chan + kfree(cdesc->desc); + kfree(cdesc); + ++ dwc->cdesc = NULL; ++ + clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags); + } + EXPORT_SYMBOL(dw_dma_cyclic_free); + + /*----------------------------------------------------------------------*/ + +-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) ++int dw_dma_probe(struct dw_dma_chip *chip) + { ++ struct dw_dma_platform_data *pdata; + struct dw_dma *dw; + bool autocfg = false; + unsigned int dw_params; +- unsigned int max_blk_size = 0; ++ unsigned int i; + int err; +- int i; + + dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + ++ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL); ++ if (!dw->pdata) ++ return -ENOMEM; ++ + dw->regs = chip->regs; + chip->dw = dw; + + pm_runtime_get_sync(chip->dev); + +- if (!pdata) { +- dw_params = dma_read_byaddr(chip->regs, DW_PARAMS); ++ if (!chip->pdata) { ++ dw_params = dma_readl(dw, DW_PARAMS); + dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params); + + autocfg = dw_params >> DW_PARAMS_EN & 1; +@@ -1524,29 +1471,31 @@ int dw_dma_probe(struct dw_dma_chip *chi + goto err_pdata; + } + +- pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL); +- if (!pdata) { +- err = -ENOMEM; +- goto err_pdata; +- } ++ /* Reassign the platform data pointer */ ++ pdata = dw->pdata; + + /* Get hardware configuration parameters */ + pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1; + pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1; + for (i = 0; i < pdata->nr_masters; i++) { + pdata->data_width[i] = +- (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2; ++ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3); + } +- max_blk_size = dma_readl(dw, MAX_BLK_SIZE); ++ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE); + + /* Fill platform data with the default values */ + pdata->is_private = true; + pdata->is_memcpy = true; + pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING; + pdata->chan_priority = CHAN_PRIORITY_ASCENDING; +- } else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { ++ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) { + err = -EINVAL; + goto err_pdata; ++ } else { ++ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata)); ++ ++ /* Reassign the platform data pointer */ ++ pdata = dw->pdata; + } + + dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan), +@@ -1556,11 +1505,6 @@ int dw_dma_probe(struct dw_dma_chip *chi + goto err_pdata; + } + +- /* Get hardware configuration parameters */ +- dw->nr_masters = pdata->nr_masters; +- for (i = 0; i < dw->nr_masters; i++) +- dw->data_width[i] = pdata->data_width[i]; +- + /* Calculate all channel mask before DMA setup */ + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + +@@ -1607,7 +1551,6 @@ int dw_dma_probe(struct dw_dma_chip *chi + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); +- INIT_LIST_HEAD(&dwc->free_list); + + channel_clear_bit(dw, CH_EN, dwc->mask); + +@@ -1615,11 +1558,9 @@ int dw_dma_probe(struct dw_dma_chip *chi + + /* Hardware configuration */ + if (autocfg) { +- unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; +- void __iomem *addr = chip->regs + r * sizeof(u32); +- +- dwc_params = dma_read_byaddr(addr, DWC_PARAMS); ++ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r]; ++ unsigned int dwc_params = dma_readl_native(addr); + + dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i, + dwc_params); +@@ -1630,16 +1571,15 @@ int dw_dma_probe(struct dw_dma_chip *chi + * up to 0x0a for 4095. + */ + dwc->block_size = +- (4 << ((max_blk_size >> 4 * i) & 0xf)) - 1; ++ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1; + dwc->nollp = + (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0; + } else { + dwc->block_size = pdata->block_size; + + /* Check if channel supports multi block transfer */ +- channel_writel(dwc, LLP, 0xfffffffc); +- dwc->nollp = +- (channel_readl(dwc, LLP) & 0xfffffffc) == 0; ++ channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff)); ++ dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0; + channel_writel(dwc, LLP, 0); + } + } +--- a/drivers/dma/dw/pci.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/pci.c 2016-05-21 22:47:08.665465180 +0200 +@@ -17,8 +17,8 @@ + + static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) + { ++ const struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + struct dw_dma_chip *chip; +- struct dw_dma_platform_data *pdata = (void *)pid->driver_data; + int ret; + + ret = pcim_enable_device(pdev); +@@ -49,8 +49,9 @@ static int dw_pci_probe(struct pci_dev * + chip->dev = &pdev->dev; + chip->regs = pcim_iomap_table(pdev)[0]; + chip->irq = pdev->irq; ++ chip->pdata = pdata; + +- ret = dw_dma_probe(chip, pdata); ++ ret = dw_dma_probe(chip); + if (ret) + return ret; + +@@ -108,6 +109,10 @@ static const struct pci_device_id dw_pci + + /* Haswell */ + { PCI_VDEVICE(INTEL, 0x9c60) }, ++ ++ /* Broadwell */ ++ { PCI_VDEVICE(INTEL, 0x9ce0) }, ++ + { } + }; + MODULE_DEVICE_TABLE(pci, dw_pci_id_table); +--- a/drivers/dma/dw/platform.c 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/platform.c 2016-05-21 22:47:08.665465180 +0200 +@@ -42,13 +42,13 @@ static struct dma_chan *dw_dma_of_xlate( + + slave.src_id = dma_spec->args[0]; + slave.dst_id = dma_spec->args[0]; +- slave.src_master = dma_spec->args[1]; +- slave.dst_master = dma_spec->args[2]; ++ slave.m_master = dma_spec->args[1]; ++ slave.p_master = dma_spec->args[2]; + + if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS || + slave.dst_id >= DW_DMA_MAX_NR_REQUESTS || +- slave.src_master >= dw->nr_masters || +- slave.dst_master >= dw->nr_masters)) ++ slave.m_master >= dw->pdata->nr_masters || ++ slave.p_master >= dw->pdata->nr_masters)) + return NULL; + + dma_cap_zero(cap); +@@ -66,8 +66,8 @@ static bool dw_dma_acpi_filter(struct dm + .dma_dev = dma_spec->dev, + .src_id = dma_spec->slave_id, + .dst_id = dma_spec->slave_id, +- .src_master = 1, +- .dst_master = 0, ++ .m_master = 0, ++ .p_master = 1, + }; + + return dw_dma_filter(chan, &slave); +@@ -103,18 +103,28 @@ dw_dma_parse_dt(struct platform_device * + struct device_node *np = pdev->dev.of_node; + struct dw_dma_platform_data *pdata; + u32 tmp, arr[DW_DMA_MAX_NR_MASTERS]; ++ u32 nr_masters; ++ u32 nr_channels; + + if (!np) { + dev_err(&pdev->dev, "Missing DT data\n"); + return NULL; + } + ++ if (of_property_read_u32(np, "dma-masters", &nr_masters)) ++ return NULL; ++ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS) ++ return NULL; ++ ++ if (of_property_read_u32(np, "dma-channels", &nr_channels)) ++ return NULL; ++ + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return NULL; + +- if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) +- return NULL; ++ pdata->nr_masters = nr_masters; ++ pdata->nr_channels = nr_channels; + + if (of_property_read_bool(np, "is_private")) + pdata->is_private = true; +@@ -128,17 +138,13 @@ dw_dma_parse_dt(struct platform_device * + if (!of_property_read_u32(np, "block_size", &tmp)) + pdata->block_size = tmp; + +- if (!of_property_read_u32(np, "dma-masters", &tmp)) { +- if (tmp > DW_DMA_MAX_NR_MASTERS) +- return NULL; +- +- pdata->nr_masters = tmp; +- } +- +- if (!of_property_read_u32_array(np, "data_width", arr, +- pdata->nr_masters)) +- for (tmp = 0; tmp < pdata->nr_masters; tmp++) ++ if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) { ++ for (tmp = 0; tmp < nr_masters; tmp++) + pdata->data_width[tmp] = arr[tmp]; ++ } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) { ++ for (tmp = 0; tmp < nr_masters; tmp++) ++ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07); ++ } + + return pdata; + } +@@ -155,8 +161,7 @@ static int dw_probe(struct platform_devi + struct dw_dma_chip *chip; + struct device *dev = &pdev->dev; + struct resource *mem; +- const struct acpi_device_id *id; +- struct dw_dma_platform_data *pdata; ++ const struct dw_dma_platform_data *pdata; + int err; + + chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL); +@@ -179,13 +184,9 @@ static int dw_probe(struct platform_devi + pdata = dev_get_platdata(dev); + if (!pdata) + pdata = dw_dma_parse_dt(pdev); +- if (!pdata && has_acpi_companion(dev)) { +- id = acpi_match_device(dev->driver->acpi_match_table, dev); +- if (id) +- pdata = (struct dw_dma_platform_data *)id->driver_data; +- } + + chip->dev = dev; ++ chip->pdata = pdata; + + chip->clk = devm_clk_get(chip->dev, "hclk"); + if (IS_ERR(chip->clk)) +@@ -196,7 +197,7 @@ static int dw_probe(struct platform_devi + + pm_runtime_enable(&pdev->dev); + +- err = dw_dma_probe(chip, pdata); ++ err = dw_dma_probe(chip); + if (err) + goto err_dw_dma_probe; + +@@ -239,7 +240,19 @@ static void dw_shutdown(struct platform_ + { + struct dw_dma_chip *chip = platform_get_drvdata(pdev); + ++ /* ++ * We have to call dw_dma_disable() to stop any ongoing transfer. On ++ * some platforms we can't do that since DMA device is powered off. ++ * Moreover we have no possibility to check if the platform is affected ++ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put() ++ * unconditionally. On the other hand we can't use ++ * pm_runtime_suspended() because runtime PM framework is not fully ++ * used by the driver. ++ */ ++ pm_runtime_get_sync(chip->dev); + dw_dma_disable(chip); ++ pm_runtime_put_sync_suspend(chip->dev); ++ + clk_disable_unprepare(chip->clk); + } + +@@ -252,17 +265,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_tab + #endif + + #ifdef CONFIG_ACPI +-static struct dw_dma_platform_data dw_dma_acpi_pdata = { +- .nr_channels = 8, +- .is_private = true, +- .chan_allocation_order = CHAN_ALLOCATION_ASCENDING, +- .chan_priority = CHAN_PRIORITY_ASCENDING, +- .block_size = 4095, +- .nr_masters = 2, +-}; +- + static const struct acpi_device_id dw_dma_acpi_id_table[] = { +- { "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata }, ++ { "INTL9C60", 0 }, + { } + }; + MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table); +--- a/drivers/dma/dw/regs.h 2016-05-21 23:13:19.964478443 +0200 ++++ b/drivers/dma/dw/regs.h 2016-05-21 22:47:08.665465180 +0200 +@@ -114,10 +114,6 @@ struct dw_dma_regs { + #define dma_writel_native writel + #endif + +-/* To access the registers in early stage of probe */ +-#define dma_read_byaddr(addr, name) \ +- dma_readl_native((addr) + offsetof(struct dw_dma_regs, name)) +- + /* Bitfields in DW_PARAMS */ + #define DW_PARAMS_NR_CHAN 8 /* number of channels */ + #define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */ +@@ -143,6 +139,10 @@ enum dw_dma_msize { + DW_DMA_MSIZE_256, + }; + ++/* Bitfields in LLP */ ++#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */ ++#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */ ++ + /* Bitfields in CTL_LO */ + #define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ + #define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +@@ -150,7 +150,7 @@ enum dw_dma_msize { + #define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ + #define DWC_CTLL_DST_DEC (1<<7) + #define DWC_CTLL_DST_FIX (2<<7) +-#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ ++#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */ + #define DWC_CTLL_SRC_DEC (1<<9) + #define DWC_CTLL_SRC_FIX (2<<9) + #define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +@@ -216,6 +216,8 @@ enum dw_dma_msize { + enum dw_dmac_flags { + DW_DMA_IS_CYCLIC = 0, + DW_DMA_IS_SOFT_LLP = 1, ++ DW_DMA_IS_PAUSED = 2, ++ DW_DMA_IS_INITIALIZED = 3, + }; + + struct dw_dma_chan { +@@ -224,8 +226,6 @@ struct dw_dma_chan { + u8 mask; + u8 priority; + enum dma_transfer_direction direction; +- bool paused; +- bool initialized; + + /* software emulation of the LLP transfers */ + struct list_head *tx_node_active; +@@ -236,8 +236,6 @@ struct dw_dma_chan { + unsigned long flags; + struct list_head active_list; + struct list_head queue; +- struct list_head free_list; +- u32 residue; + struct dw_cyclic_desc *cdesc; + + unsigned int descs_allocated; +@@ -249,8 +247,8 @@ struct dw_dma_chan { + /* custom slave configuration */ + u8 src_id; + u8 dst_id; +- u8 src_master; +- u8 dst_master; ++ u8 m_master; ++ u8 p_master; + + /* configuration passed via .device_config */ + struct dma_slave_config dma_sconfig; +@@ -283,9 +281,8 @@ struct dw_dma { + u8 all_chan_mask; + u8 in_use; + +- /* hardware configuration */ +- unsigned char nr_masters; +- unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; ++ /* platform data */ ++ struct dw_dma_platform_data *pdata; + }; + + static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +@@ -308,32 +305,51 @@ static inline struct dw_dma *to_dw_dma(s + return container_of(ddev, struct dw_dma, dma); + } + ++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO ++typedef __be32 __dw32; ++#else ++typedef __le32 __dw32; ++#endif ++ + /* LLI == Linked List Item; a.k.a. DMA block descriptor */ + struct dw_lli { + /* values that are not changed by hardware */ +- u32 sar; +- u32 dar; +- u32 llp; /* chain to next lli */ +- u32 ctllo; ++ __dw32 sar; ++ __dw32 dar; ++ __dw32 llp; /* chain to next lli */ ++ __dw32 ctllo; + /* values that may get written back: */ +- u32 ctlhi; ++ __dw32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ +- u32 sstat; +- u32 dstat; ++ __dw32 sstat; ++ __dw32 dstat; + }; + + struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + ++#ifdef CONFIG_DW_DMAC_BIG_ENDIAN_IO ++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_be32(v)) ++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_be32(v)) ++#define lli_read(d, reg) be32_to_cpu((d)->lli.reg) ++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_be32(v)) ++#else ++#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v)) ++#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v)) ++#define lli_read(d, reg) le32_to_cpu((d)->lli.reg) ++#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v)) ++#endif ++ + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + size_t len; + size_t total_len; ++ u32 residue; + }; + + #define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node) +--- a/include/linux/dma/dw.h ++++ b/include/linux/dma/dw.h +@@ -27,6 +27,7 @@ struct dw_dma; + * @regs: memory mapped I/O space + * @clk: hclk clock + * @dw: struct dw_dma that is filed by dw_dma_probe() ++ * @pdata: pointer to platform data + */ + struct dw_dma_chip { + struct device *dev; +@@ -34,10 +35,12 @@ struct dw_dma_chip { + void __iomem *regs; + struct clk *clk; + struct dw_dma *dw; ++ ++ const struct dw_dma_platform_data *pdata; + }; + + /* Export to the platform drivers */ +-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata); ++int dw_dma_probe(struct dw_dma_chip *chip); + int dw_dma_remove(struct dw_dma_chip *chip); + + /* DMA API extensions */ +diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h +index 03b6095..d15d8ba 100644 +--- a/include/linux/platform_data/dma-dw.h ++++ b/include/linux/platform_data/dma-dw.h +@@ -21,15 +21,15 @@ + * @dma_dev: required DMA master device + * @src_id: src request line + * @dst_id: dst request line +- * @src_master: src master for transfers on allocated channel. +- * @dst_master: dest master for transfers on allocated channel. ++ * @m_master: memory master for transfers on allocated channel ++ * @p_master: peripheral master for transfers on allocated channel + */ + struct dw_dma_slave { + struct device *dma_dev; + u8 src_id; + u8 dst_id; +- u8 src_master; +- u8 dst_master; ++ u8 m_master; ++ u8 p_master; + }; + + /** +@@ -43,7 +43,7 @@ struct dw_dma_slave { + * @block_size: Maximum block size supported by the controller + * @nr_masters: Number of AHB masters supported by the controller + * @data_width: Maximum data width supported by hardware per AHB master +- * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) ++ * (in bytes, power of 2) + */ + struct dw_dma_platform_data { + unsigned int nr_channels; +@@ -55,7 +55,7 @@ struct dw_dma_platform_data { + #define CHAN_PRIORITY_ASCENDING 0 /* chan0 highest */ + #define CHAN_PRIORITY_DESCENDING 1 /* chan7 highest */ + unsigned char chan_priority; +- unsigned short block_size; ++ unsigned int block_size; + unsigned char nr_masters; + unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; + }; +-- +2.8.1 + |