diff options
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0698-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch')
-rw-r--r-- | target/linux/brcm2708/patches-4.19/950-0698-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch | 818 |
1 files changed, 818 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0698-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch b/target/linux/brcm2708/patches-4.19/950-0698-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch new file mode 100644 index 0000000000..c69a8ca16e --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0698-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch @@ -0,0 +1,818 @@ +From ccd23ce562e8223ba7c6acf7dcb7058ff89ff7ec Mon Sep 17 00:00:00 2001 +From: yaroslavros <yaroslavros@gmail.com> +Date: Wed, 14 Aug 2019 15:22:55 +0100 +Subject: [PATCH] Ported pcie-brcmstb bounce buffer implementation to + ARM64. (#3144) + +Ported pcie-brcmstb bounce buffer implementation to ARM64. +This enables full 4G RAM usage on Raspberry Pi in 64-bit mode. + +Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com> +--- + arch/arm64/include/asm/dma-mapping.h | 21 + + arch/arm64/mm/dma-mapping.c | 50 ++ + drivers/pci/controller/Makefile | 3 + + drivers/pci/controller/pcie-brcmstb-bounce.h | 2 +- + .../pci/controller/pcie-brcmstb-bounce64.c | 576 ++++++++++++++++++ + drivers/pci/controller/pcie-brcmstb.c | 30 +- + 6 files changed, 658 insertions(+), 24 deletions(-) + create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c + +--- a/arch/arm64/include/asm/dma-mapping.h ++++ b/arch/arm64/include/asm/dma-mapping.h +@@ -24,6 +24,27 @@ + #include <xen/xen.h> + #include <asm/xen/hypervisor.h> + ++extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, ++ gfp_t gfp, unsigned long attrs); ++extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr, ++ dma_addr_t handle, unsigned long attrs); ++extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma, ++ void *cpu_addr, dma_addr_t dma_addr, size_t size, ++ unsigned long attrs); ++extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt, ++ void *cpu_addr, dma_addr_t dma_addr, size_t size, ++ unsigned long attrs); ++extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir, unsigned long attrs); ++extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int, ++ enum dma_data_direction dir, unsigned long attrs); ++extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir); ++extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir); ++ ++ ++ + extern const struct dma_map_ops dummy_dma_ops; + + static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +--- a/arch/arm64/mm/dma-mapping.c ++++ b/arch/arm64/mm/dma-mapping.c +@@ -138,6 +138,12 @@ no_mem: + return NULL; + } + ++void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, ++ gfp_t gfp, unsigned long attrs) ++{ ++ return __dma_alloc(dev, size, handle, gfp, attrs); ++} ++ + static void __dma_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + unsigned long attrs) +@@ -154,6 +160,12 @@ static void __dma_free(struct device *de + swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs); + } + ++void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr, ++ dma_addr_t handle, unsigned long attrs) ++{ ++ __dma_free(dev, size, cpu_addr, handle, attrs); ++} ++ + static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, +@@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct + return ret; + } + ++int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); ++} ++ + static void __swiotlb_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, +@@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(str + swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); + } + ++void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); ++} ++ + static void __swiotlb_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +@@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(st + swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); + } + ++void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir) ++{ ++ __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); ++} ++ + static void __swiotlb_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +@@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device + sg->length, dir); + } + ++void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, ++ enum dma_data_direction dir) ++{ ++ __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); ++} ++ + static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, + unsigned long pfn, size_t size) + { +@@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device + return __swiotlb_mmap_pfn(vma, pfn, size); + } + ++int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma, ++ void *cpu_addr, dma_addr_t dma_addr, size_t size, ++ unsigned long attrs) ++{ ++ return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); ++} ++ + static int __swiotlb_get_sgtable_page(struct sg_table *sgt, + struct page *page, size_t size) + { +@@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct + return __swiotlb_get_sgtable_page(sgt, page, size); + } + ++int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt, ++ void *cpu_addr, dma_addr_t dma_addr, size_t size, ++ unsigned long attrs) ++{ ++ return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); ++} ++ + static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) + { + if (swiotlb) +--- a/drivers/pci/controller/Makefile ++++ b/drivers/pci/controller/Makefile +@@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms + ifdef CONFIG_ARM + obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o + endif ++ifdef CONFIG_ARM64 ++obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o ++endif + + obj-$(CONFIG_VMD) += vmd.o + # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW +--- a/drivers/pci/controller/pcie-brcmstb-bounce.h ++++ b/drivers/pci/controller/pcie-brcmstb-bounce.h +@@ -6,7 +6,7 @@ + #ifndef _PCIE_BRCMSTB_BOUNCE_H + #define _PCIE_BRCMSTB_BOUNCE_H + +-#ifdef CONFIG_ARM ++#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + + int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size, + dma_addr_t threshold); +--- /dev/null ++++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c +@@ -0,0 +1,576 @@ ++/* ++ * This code started out as a version of arch/arm/common/dmabounce.c, ++ * modified to cope with highmem pages. Now it has been changed heavily - ++ * it now preallocates a large block (currently 4MB) and carves it up ++ * sequentially in ring fashion, and DMA is used to copy the data - to the ++ * point where very little of the original remains. ++ * ++ * Copyright (C) 2019 Raspberry Pi (Trading) Ltd. ++ * ++ * Original version by Brad Parker (brad@heeltoe.com) ++ * Re-written by Christopher Hoover <ch@murgatroid.com> ++ * Made generic by Deepak Saxena <dsaxena@plexity.net> ++ * ++ * Copyright (C) 2002 Hewlett Packard Company. ++ * Copyright (C) 2004 MontaVista Software, Inc. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * version 2 as published by the Free Software Foundation. ++ */ ++ ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/slab.h> ++#include <linux/page-flags.h> ++#include <linux/device.h> ++#include <linux/dma-mapping.h> ++#include <linux/dma-direct.h> ++#include <linux/dmapool.h> ++#include <linux/list.h> ++#include <linux/scatterlist.h> ++#include <linux/bitmap.h> ++#include <linux/swiotlb.h> ++ ++#include <asm/cacheflush.h> ++ ++#define STATS ++ ++#ifdef STATS ++#define DO_STATS(X) do { X ; } while (0) ++#else ++#define DO_STATS(X) do { } while (0) ++#endif ++ ++/* ************************************************** */ ++ ++struct safe_buffer { ++ struct list_head node; ++ ++ /* original request */ ++ size_t size; ++ int direction; ++ ++ struct dmabounce_pool *pool; ++ void *safe; ++ dma_addr_t unsafe_dma_addr; ++ dma_addr_t safe_dma_addr; ++}; ++ ++struct dmabounce_pool { ++ unsigned long pages; ++ void *virt_addr; ++ dma_addr_t dma_addr; ++ unsigned long *alloc_map; ++ unsigned long alloc_pos; ++ spinlock_t lock; ++ struct device *dev; ++ unsigned long num_pages; ++#ifdef STATS ++ size_t max_size; ++ unsigned long num_bufs; ++ unsigned long max_bufs; ++ unsigned long max_pages; ++#endif ++}; ++ ++struct dmabounce_device_info { ++ struct device *dev; ++ dma_addr_t threshold; ++ struct list_head safe_buffers; ++ struct dmabounce_pool pool; ++ rwlock_t lock; ++#ifdef STATS ++ unsigned long map_count; ++ unsigned long unmap_count; ++ unsigned long sync_dev_count; ++ unsigned long sync_cpu_count; ++ unsigned long fail_count; ++ int attr_res; ++#endif ++}; ++ ++static struct dmabounce_device_info *g_dmabounce_device_info; ++ ++extern int bcm2838_dma40_memcpy_init(void); ++extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size); ++ ++#ifdef STATS ++static ssize_t ++bounce_show(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ struct dmabounce_device_info *device_info = g_dmabounce_device_info; ++ return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n", ++ device_info->map_count, ++ device_info->unmap_count, ++ device_info->sync_dev_count, ++ device_info->sync_cpu_count, ++ device_info->fail_count, ++ device_info->pool.max_size, ++ device_info->pool.num_bufs, ++ device_info->pool.max_bufs, ++ device_info->pool.num_pages * PAGE_SIZE, ++ device_info->pool.max_pages * PAGE_SIZE); ++} ++ ++static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL); ++#endif ++ ++static int bounce_create(struct dmabounce_pool *pool, struct device *dev, ++ unsigned long buffer_size) ++{ ++ int ret = -ENOMEM; ++ pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE; ++ pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL); ++ if (!pool->alloc_map) ++ goto err_bitmap; ++ pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE, ++ &pool->dma_addr, GFP_KERNEL); ++ if (!pool->virt_addr) ++ goto err_dmabuf; ++ ++ pool->alloc_pos = 0; ++ spin_lock_init(&pool->lock); ++ pool->dev = dev; ++ pool->num_pages = 0; ++ ++ DO_STATS(pool->max_size = 0); ++ DO_STATS(pool->num_bufs = 0); ++ DO_STATS(pool->max_bufs = 0); ++ DO_STATS(pool->max_pages = 0); ++ ++ return 0; ++ ++err_dmabuf: ++ bitmap_free(pool->alloc_map); ++err_bitmap: ++ return ret; ++} ++ ++static void bounce_destroy(struct dmabounce_pool *pool) ++{ ++ dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr, ++ pool->dma_addr); ++ ++ bitmap_free(pool->alloc_map); ++} ++ ++static void *bounce_alloc(struct dmabounce_pool *pool, size_t size, ++ dma_addr_t *dmaaddrp) ++{ ++ unsigned long pages; ++ unsigned long flags; ++ unsigned long pos; ++ ++ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; ++ ++ DO_STATS(pool->max_size = max(size, pool->max_size)); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, ++ pool->alloc_pos, pages, 0); ++ /* If not found, try from the start */ ++ if (pos >= pool->pages && pool->alloc_pos) ++ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, ++ 0, pages, 0); ++ ++ if (pos >= pool->pages) { ++ spin_unlock_irqrestore(&pool->lock, flags); ++ return NULL; ++ } ++ ++ bitmap_set(pool->alloc_map, pos, pages); ++ pool->alloc_pos = (pos + pages) % pool->pages; ++ pool->num_pages += pages; ++ ++ DO_STATS(pool->num_bufs++); ++ DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs)); ++ DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages)); ++ ++ spin_unlock_irqrestore(&pool->lock, flags); ++ ++ *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE; ++ ++ return pool->virt_addr + pos * PAGE_SIZE; ++} ++ ++static void ++bounce_free(struct dmabounce_pool *pool, void *buf, size_t size) ++{ ++ unsigned long pages; ++ unsigned long flags; ++ unsigned long pos; ++ ++ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; ++ pos = (buf - pool->virt_addr)/PAGE_SIZE; ++ ++ BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1)); ++ ++ spin_lock_irqsave(&pool->lock, flags); ++ bitmap_clear(pool->alloc_map, pos, pages); ++ pool->num_pages -= pages; ++ if (pool->num_pages == 0) ++ pool->alloc_pos = 0; ++ DO_STATS(pool->num_bufs--); ++ spin_unlock_irqrestore(&pool->lock, flags); ++} ++ ++/* allocate a 'safe' buffer and keep track of it */ ++static struct safe_buffer * ++alloc_safe_buffer(struct dmabounce_device_info *device_info, ++ dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) ++{ ++ struct safe_buffer *buf; ++ struct dmabounce_pool *pool = &device_info->pool; ++ struct device *dev = device_info->dev; ++ unsigned long flags; ++ ++ /* ++ * Although one might expect this to be called in thread context, ++ * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic() ++ * was previously used to select the appropriate allocation mode, ++ * but this is unsafe. ++ */ ++ buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC); ++ if (!buf) { ++ dev_warn(dev, "%s: kmalloc failed\n", __func__); ++ return NULL; ++ } ++ ++ buf->unsafe_dma_addr = dma_addr; ++ buf->size = size; ++ buf->direction = dir; ++ buf->pool = pool; ++ ++ buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr); ++ ++ if (!buf->safe) { ++ dev_warn(dev, ++ "%s: could not alloc dma memory (size=%d)\n", ++ __func__, size); ++ kfree(buf); ++ return NULL; ++ } ++ ++ write_lock_irqsave(&device_info->lock, flags); ++ list_add(&buf->node, &device_info->safe_buffers); ++ write_unlock_irqrestore(&device_info->lock, flags); ++ ++ return buf; ++} ++ ++/* determine if a buffer is from our "safe" pool */ ++static struct safe_buffer * ++find_safe_buffer(struct dmabounce_device_info *device_info, ++ dma_addr_t safe_dma_addr) ++{ ++ struct safe_buffer *b, *rb = NULL; ++ unsigned long flags; ++ ++ read_lock_irqsave(&device_info->lock, flags); ++ ++ list_for_each_entry(b, &device_info->safe_buffers, node) ++ if (b->safe_dma_addr <= safe_dma_addr && ++ b->safe_dma_addr + b->size > safe_dma_addr) { ++ rb = b; ++ break; ++ } ++ ++ read_unlock_irqrestore(&device_info->lock, flags); ++ return rb; ++} ++ ++static void ++free_safe_buffer(struct dmabounce_device_info *device_info, ++ struct safe_buffer *buf) ++{ ++ unsigned long flags; ++ ++ write_lock_irqsave(&device_info->lock, flags); ++ list_del(&buf->node); ++ write_unlock_irqrestore(&device_info->lock, flags); ++ ++ bounce_free(buf->pool, buf->safe, buf->size); ++ ++ kfree(buf); ++} ++ ++/* ************************************************** */ ++ ++static struct safe_buffer * ++find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where) ++{ ++ if (!dev || !g_dmabounce_device_info) ++ return NULL; ++ if (dma_mapping_error(dev, dma_addr)) { ++ dev_err(dev, "Trying to %s invalid mapping\n", where); ++ return NULL; ++ } ++ return find_safe_buffer(g_dmabounce_device_info, dma_addr); ++} ++ ++static dma_addr_t ++map_single(struct device *dev, struct safe_buffer *buf, size_t size, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ BUG_ON(buf->size != size); ++ BUG_ON(buf->direction != dir); ++ ++ dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr, ++ (u64)buf->safe_dma_addr); ++ ++ if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) && ++ !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) ++ bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr, ++ size); ++ ++ return buf->safe_dma_addr; ++} ++ ++static dma_addr_t ++unmap_single(struct device *dev, struct safe_buffer *buf, size_t size, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ BUG_ON(buf->size != size); ++ BUG_ON(buf->direction != dir); ++ ++ if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && ++ !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { ++ dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr, ++ (u64)buf->unsafe_dma_addr); ++ ++ bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr, ++ size); ++ } ++ return buf->unsafe_dma_addr; ++} ++ ++/* ************************************************** */ ++ ++/* ++ * see if a buffer address is in an 'unsafe' range. if it is ++ * allocate a 'safe' buffer and copy the unsafe buffer into it. ++ * substitute the safe buffer for the unsafe one. ++ * (basically move the buffer from an unsafe area to a safe one) ++ */ ++static dma_addr_t ++dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset, ++ size_t size, enum dma_data_direction dir, ++ unsigned long attrs) ++{ ++ struct dmabounce_device_info *device_info = g_dmabounce_device_info; ++ dma_addr_t dma_addr; ++ ++ dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; ++ ++ swiotlb_sync_single_for_device(dev, dma_addr, size, dir); ++ if (!is_device_dma_coherent(dev)) ++ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); ++ ++ if (device_info && (dma_addr + size) > device_info->threshold) { ++ struct safe_buffer *buf; ++ ++ buf = alloc_safe_buffer(device_info, dma_addr, size, dir); ++ if (!buf) { ++ DO_STATS(device_info->fail_count++); ++ return (~(dma_addr_t)0x0); ++ } ++ ++ DO_STATS(device_info->map_count++); ++ ++ dma_addr = map_single(dev, buf, size, dir, attrs); ++ } ++ return dma_addr; ++} ++ ++/* ++ * see if a mapped address was really a "safe" buffer and if so, copy ++ * the data from the safe buffer back to the unsafe buffer and free up ++ * the safe buffer. (basically return things back to the way they ++ * should be) ++ */ ++static void ++dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction dir, unsigned long attrs) ++{ ++ struct safe_buffer *buf; ++ ++ buf = find_safe_buffer_dev(dev, dma_addr, __func__); ++ if (buf) { ++ DO_STATS(g_dmabounce_device_info->unmap_count++); ++ dma_addr = unmap_single(dev, buf, size, dir, attrs); ++ free_safe_buffer(g_dmabounce_device_info, buf); ++ } ++ ++ if (!is_device_dma_coherent(dev)) ++ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); ++ swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); ++} ++ ++/* ++ * A version of dmabounce_map_page that assumes the mapping has already ++ * been created - intended for streaming operation. ++ */ ++static void ++dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, ++ enum dma_data_direction dir) ++{ ++ struct safe_buffer *buf; ++ ++ swiotlb_sync_single_for_device(dev, dma_addr, size, dir); ++ if (!is_device_dma_coherent(dev)) ++ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); ++ ++ buf = find_safe_buffer_dev(dev, dma_addr, __func__); ++ if (buf) { ++ DO_STATS(g_dmabounce_device_info->sync_dev_count++); ++ map_single(dev, buf, size, dir, 0); ++ } ++} ++ ++/* ++ * A version of dmabounce_unmap_page that doesn't destroy the mapping - ++ * intended for streaming operation. ++ */ ++static void ++dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr, ++ size_t size, enum dma_data_direction dir) ++{ ++ struct safe_buffer *buf; ++ ++ buf = find_safe_buffer_dev(dev, dma_addr, __func__); ++ if (buf) { ++ DO_STATS(g_dmabounce_device_info->sync_cpu_count++); ++ dma_addr = unmap_single(dev, buf, size, dir, 0); ++ } ++ ++ if (!is_device_dma_coherent(dev)) ++ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir); ++ swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir); ++} ++ ++static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) ++{ ++ if (g_dmabounce_device_info) ++ return 0; ++ ++ return swiotlb_dma_supported(dev, dma_mask); ++} ++ ++static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) ++{ ++ return swiotlb_dma_mapping_error(dev, dma_addr); ++} ++ ++static const struct dma_map_ops dmabounce_ops = { ++ .alloc = arm64_dma_alloc, ++ .free = arm64_dma_free, ++ .mmap = arm64_dma_mmap, ++ .get_sgtable = arm64_dma_get_sgtable, ++ .map_page = dmabounce_map_page, ++ .unmap_page = dmabounce_unmap_page, ++ .sync_single_for_cpu = dmabounce_sync_for_cpu, ++ .sync_single_for_device = dmabounce_sync_for_device, ++ .map_sg = arm64_dma_map_sg, ++ .unmap_sg = arm64_dma_unmap_sg, ++ .sync_sg_for_cpu = arm64_dma_sync_sg_for_cpu, ++ .sync_sg_for_device = arm64_dma_sync_sg_for_device, ++ .dma_supported = dmabounce_dma_supported, ++ .mapping_error = dmabounce_mapping_error, ++}; ++ ++int brcm_pcie_bounce_init(struct device *dev, ++ unsigned long buffer_size, ++ dma_addr_t threshold) ++{ ++ struct dmabounce_device_info *device_info; ++ int ret; ++ ++ /* Only support a single client */ ++ if (g_dmabounce_device_info) ++ return -EBUSY; ++ ++ ret = bcm2838_dma40_memcpy_init(); ++ if (ret) ++ return ret; ++ ++ device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC); ++ if (!device_info) { ++ dev_err(dev, ++ "Could not allocated dmabounce_device_info\n"); ++ return -ENOMEM; ++ } ++ ++ ret = bounce_create(&device_info->pool, dev, buffer_size); ++ if (ret) { ++ dev_err(dev, ++ "dmabounce: could not allocate %ld byte DMA pool\n", ++ buffer_size); ++ goto err_bounce; ++ } ++ ++ device_info->dev = dev; ++ device_info->threshold = threshold; ++ INIT_LIST_HEAD(&device_info->safe_buffers); ++ rwlock_init(&device_info->lock); ++ ++ DO_STATS(device_info->map_count = 0); ++ DO_STATS(device_info->unmap_count = 0); ++ DO_STATS(device_info->sync_dev_count = 0); ++ DO_STATS(device_info->sync_cpu_count = 0); ++ DO_STATS(device_info->fail_count = 0); ++ DO_STATS(device_info->attr_res = ++ device_create_file(dev, &dev_attr_dmabounce_stats)); ++ ++ g_dmabounce_device_info = device_info; ++ ++ dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n", ++ buffer_size / 1024, &threshold); ++ ++ return 0; ++ ++ err_bounce: ++ kfree(device_info); ++ return ret; ++} ++EXPORT_SYMBOL(brcm_pcie_bounce_init); ++ ++void brcm_pcie_bounce_uninit(struct device *dev) ++{ ++ struct dmabounce_device_info *device_info = g_dmabounce_device_info; ++ ++ g_dmabounce_device_info = NULL; ++ ++ if (!device_info) { ++ dev_warn(dev, ++ "Never registered with dmabounce but attempting" ++ "to unregister!\n"); ++ return; ++ } ++ ++ if (!list_empty(&device_info->safe_buffers)) { ++ dev_err(dev, ++ "Removing from dmabounce with pending buffers!\n"); ++ BUG(); ++ } ++ ++ bounce_destroy(&device_info->pool); ++ ++ DO_STATS(if (device_info->attr_res == 0) ++ device_remove_file(dev, &dev_attr_dmabounce_stats)); ++ ++ kfree(device_info); ++} ++EXPORT_SYMBOL(brcm_pcie_bounce_uninit); ++ ++int brcm_pcie_bounce_register_dev(struct device *dev) ++{ ++ set_dma_ops(dev, &dmabounce_ops); ++ ++ return 0; ++} ++EXPORT_SYMBOL(brcm_pcie_bounce_register_dev); ++ ++MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>"); ++MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb"); ++MODULE_LICENSE("GPL"); +--- a/drivers/pci/controller/pcie-brcmstb.c ++++ b/drivers/pci/controller/pcie-brcmstb.c +@@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma + + static void brcm_set_dma_ops(struct device *dev) + { +- int ret; +- +- if (IS_ENABLED(CONFIG_ARM64)) { +- /* +- * We are going to invoke get_dma_ops(). That +- * function, at this point in time, invokes +- * get_arch_dma_ops(), and for ARM64 that function +- * returns a pointer to dummy_dma_ops. So then we'd +- * like to call arch_setup_dma_ops(), but that isn't +- * exported. Instead, we call of_dma_configure(), +- * which is exported, and this calls +- * arch_setup_dma_ops(). Once we do this the call to +- * get_dma_ops() will work properly because +- * dev->dma_ops will be set. +- */ +- ret = of_dma_configure(dev, dev->of_node, true); +- if (ret) { +- dev_err(dev, "of_dma_configure() failed: %d\n", ret); +- return; +- } +- } +- + arch_dma_ops = get_dma_ops(dev); + if (!arch_dma_ops) { + dev_err(dev, "failed to get arch_dma_ops\n"); +@@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(str + extern unsigned long max_pfn; + struct device *dev = __dev; + const char *rc_name = "0000:00:00.0"; ++ int ret; + + switch (event) { + case BUS_NOTIFY_ADD_DEVICE: + if (max_pfn > (bounce_threshold/PAGE_SIZE) && + strcmp(dev->kobj.name, rc_name)) { +- int ret; + + ret = brcm_pcie_bounce_register_dev(dev); + if (ret) { +@@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(str + ret); + return ret; + } ++ } else if (IS_ENABLED(CONFIG_ARM64)) { ++ ret = of_dma_configure(dev, dev->of_node, true); ++ if (ret) { ++ dev_err(dev, "of_dma_configure() failed: %d\n", ret); ++ return; ++ } + } + brcm_set_dma_ops(dev); + return NOTIFY_OK; |