aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
diff options
context:
space:
mode:
authorÁlvaro Fernández Rojas <noltari@gmail.com>2019-09-04 19:01:23 +0200
committerÁlvaro Fernández Rojas <noltari@gmail.com>2019-09-04 19:02:48 +0200
commit662394fb30fdbcc89ec387918714aebee6868a9f (patch)
treec308c5f1a650abf9d9ccbb2a1747469a0d8570c0 /target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
parent99a5e285887c4a10aaf06d8e01fae0707995da00 (diff)
downloadupstream-662394fb30fdbcc89ec387918714aebee6868a9f.tar.gz
upstream-662394fb30fdbcc89ec387918714aebee6868a9f.tar.bz2
upstream-662394fb30fdbcc89ec387918714aebee6868a9f.zip
brcm2708: update to latest patches from RPi foundation
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch')
-rw-r--r--target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch818
1 files changed, 818 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch b/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
new file mode 100644
index 0000000000..8e910a8156
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
@@ -0,0 +1,818 @@
+From e96d7ea9022d8b49644c2f150da162728b948c43 Mon Sep 17 00:00:00 2001
+From: yaroslavros <yaroslavros@gmail.com>
+Date: Wed, 14 Aug 2019 15:22:55 +0100
+Subject: [PATCH 767/782] Ported pcie-brcmstb bounce buffer implementation to
+ ARM64. (#3144)
+
+Ported pcie-brcmstb bounce buffer implementation to ARM64.
+This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
+
+Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
+---
+ arch/arm64/include/asm/dma-mapping.h | 21 +
+ arch/arm64/mm/dma-mapping.c | 50 ++
+ drivers/pci/controller/Makefile | 3 +
+ drivers/pci/controller/pcie-brcmstb-bounce.h | 2 +-
+ .../pci/controller/pcie-brcmstb-bounce64.c | 576 ++++++++++++++++++
+ drivers/pci/controller/pcie-brcmstb.c | 30 +-
+ 6 files changed, 658 insertions(+), 24 deletions(-)
+ create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
+
+--- a/arch/arm64/include/asm/dma-mapping.h
++++ b/arch/arm64/include/asm/dma-mapping.h
+@@ -24,6 +24,27 @@
+ #include <xen/xen.h>
+ #include <asm/xen/hypervisor.h>
+
++extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
++ gfp_t gfp, unsigned long attrs);
++extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
++ dma_addr_t handle, unsigned long attrs);
++extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
++ void *cpu_addr, dma_addr_t dma_addr, size_t size,
++ unsigned long attrs);
++extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
++ void *cpu_addr, dma_addr_t dma_addr, size_t size,
++ unsigned long attrs);
++extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir, unsigned long attrs);
++extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int,
++ enum dma_data_direction dir, unsigned long attrs);
++extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir);
++extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir);
++
++
++
+ extern const struct dma_map_ops dummy_dma_ops;
+
+ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+--- a/arch/arm64/mm/dma-mapping.c
++++ b/arch/arm64/mm/dma-mapping.c
+@@ -138,6 +138,12 @@ no_mem:
+ return NULL;
+ }
+
++void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
++ gfp_t gfp, unsigned long attrs)
++{
++ return __dma_alloc(dev, size, handle, gfp, attrs);
++}
++
+ static void __dma_free(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ unsigned long attrs)
+@@ -154,6 +160,12 @@ static void __dma_free(struct device *de
+ swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
+ }
+
++void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
++ dma_addr_t handle, unsigned long attrs)
++{
++ __dma_free(dev, size, cpu_addr, handle, attrs);
++}
++
+ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+@@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct
+ return ret;
+ }
+
++int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
++}
++
+ static void __swiotlb_unmap_sg_attrs(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir,
+@@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(str
+ swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+ }
+
++void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
++}
++
+ static void __swiotlb_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+@@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(st
+ swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+ }
+
++void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir)
++{
++ __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
++}
++
+ static void __swiotlb_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+@@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device
+ sg->length, dir);
+ }
+
++void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
++ enum dma_data_direction dir)
++{
++ __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
++}
++
+ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
+ unsigned long pfn, size_t size)
+ {
+@@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device
+ return __swiotlb_mmap_pfn(vma, pfn, size);
+ }
+
++int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
++ void *cpu_addr, dma_addr_t dma_addr, size_t size,
++ unsigned long attrs)
++{
++ return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
++}
++
+ static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+ struct page *page, size_t size)
+ {
+@@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct
+ return __swiotlb_get_sgtable_page(sgt, page, size);
+ }
+
++int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
++ void *cpu_addr, dma_addr_t dma_addr, size_t size,
++ unsigned long attrs)
++{
++ return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
++}
++
+ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
+ {
+ if (swiotlb)
+--- a/drivers/pci/controller/Makefile
++++ b/drivers/pci/controller/Makefile
+@@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
+ ifdef CONFIG_ARM
+ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
+ endif
++ifdef CONFIG_ARM64
++obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
++endif
+
+ obj-$(CONFIG_VMD) += vmd.o
+ # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
+--- a/drivers/pci/controller/pcie-brcmstb-bounce.h
++++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
+@@ -6,7 +6,7 @@
+ #ifndef _PCIE_BRCMSTB_BOUNCE_H
+ #define _PCIE_BRCMSTB_BOUNCE_H
+
+-#ifdef CONFIG_ARM
++#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+
+ int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
+ dma_addr_t threshold);
+--- /dev/null
++++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
+@@ -0,0 +1,576 @@
++/*
++ * This code started out as a version of arch/arm/common/dmabounce.c,
++ * modified to cope with highmem pages. Now it has been changed heavily -
++ * it now preallocates a large block (currently 4MB) and carves it up
++ * sequentially in ring fashion, and DMA is used to copy the data - to the
++ * point where very little of the original remains.
++ *
++ * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
++ *
++ * Original version by Brad Parker (brad@heeltoe.com)
++ * Re-written by Christopher Hoover <ch@murgatroid.com>
++ * Made generic by Deepak Saxena <dsaxena@plexity.net>
++ *
++ * Copyright (C) 2002 Hewlett Packard Company.
++ * Copyright (C) 2004 MontaVista Software, Inc.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * version 2 as published by the Free Software Foundation.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/page-flags.h>
++#include <linux/device.h>
++#include <linux/dma-mapping.h>
++#include <linux/dma-direct.h>
++#include <linux/dmapool.h>
++#include <linux/list.h>
++#include <linux/scatterlist.h>
++#include <linux/bitmap.h>
++#include <linux/swiotlb.h>
++
++#include <asm/cacheflush.h>
++
++#define STATS
++
++#ifdef STATS
++#define DO_STATS(X) do { X ; } while (0)
++#else
++#define DO_STATS(X) do { } while (0)
++#endif
++
++/* ************************************************** */
++
++struct safe_buffer {
++ struct list_head node;
++
++ /* original request */
++ size_t size;
++ int direction;
++
++ struct dmabounce_pool *pool;
++ void *safe;
++ dma_addr_t unsafe_dma_addr;
++ dma_addr_t safe_dma_addr;
++};
++
++struct dmabounce_pool {
++ unsigned long pages;
++ void *virt_addr;
++ dma_addr_t dma_addr;
++ unsigned long *alloc_map;
++ unsigned long alloc_pos;
++ spinlock_t lock;
++ struct device *dev;
++ unsigned long num_pages;
++#ifdef STATS
++ size_t max_size;
++ unsigned long num_bufs;
++ unsigned long max_bufs;
++ unsigned long max_pages;
++#endif
++};
++
++struct dmabounce_device_info {
++ struct device *dev;
++ dma_addr_t threshold;
++ struct list_head safe_buffers;
++ struct dmabounce_pool pool;
++ rwlock_t lock;
++#ifdef STATS
++ unsigned long map_count;
++ unsigned long unmap_count;
++ unsigned long sync_dev_count;
++ unsigned long sync_cpu_count;
++ unsigned long fail_count;
++ int attr_res;
++#endif
++};
++
++static struct dmabounce_device_info *g_dmabounce_device_info;
++
++extern int bcm2838_dma40_memcpy_init(void);
++extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
++
++#ifdef STATS
++static ssize_t
++bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
++{
++ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
++ return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
++ device_info->map_count,
++ device_info->unmap_count,
++ device_info->sync_dev_count,
++ device_info->sync_cpu_count,
++ device_info->fail_count,
++ device_info->pool.max_size,
++ device_info->pool.num_bufs,
++ device_info->pool.max_bufs,
++ device_info->pool.num_pages * PAGE_SIZE,
++ device_info->pool.max_pages * PAGE_SIZE);
++}
++
++static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
++#endif
++
++static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
++ unsigned long buffer_size)
++{
++ int ret = -ENOMEM;
++ pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
++ pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
++ if (!pool->alloc_map)
++ goto err_bitmap;
++ pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
++ &pool->dma_addr, GFP_KERNEL);
++ if (!pool->virt_addr)
++ goto err_dmabuf;
++
++ pool->alloc_pos = 0;
++ spin_lock_init(&pool->lock);
++ pool->dev = dev;
++ pool->num_pages = 0;
++
++ DO_STATS(pool->max_size = 0);
++ DO_STATS(pool->num_bufs = 0);
++ DO_STATS(pool->max_bufs = 0);
++ DO_STATS(pool->max_pages = 0);
++
++ return 0;
++
++err_dmabuf:
++ bitmap_free(pool->alloc_map);
++err_bitmap:
++ return ret;
++}
++
++static void bounce_destroy(struct dmabounce_pool *pool)
++{
++ dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
++ pool->dma_addr);
++
++ bitmap_free(pool->alloc_map);
++}
++
++static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
++ dma_addr_t *dmaaddrp)
++{
++ unsigned long pages;
++ unsigned long flags;
++ unsigned long pos;
++
++ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
++
++ DO_STATS(pool->max_size = max(size, pool->max_size));
++
++ spin_lock_irqsave(&pool->lock, flags);
++ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
++ pool->alloc_pos, pages, 0);
++ /* If not found, try from the start */
++ if (pos >= pool->pages && pool->alloc_pos)
++ pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
++ 0, pages, 0);
++
++ if (pos >= pool->pages) {
++ spin_unlock_irqrestore(&pool->lock, flags);
++ return NULL;
++ }
++
++ bitmap_set(pool->alloc_map, pos, pages);
++ pool->alloc_pos = (pos + pages) % pool->pages;
++ pool->num_pages += pages;
++
++ DO_STATS(pool->num_bufs++);
++ DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
++ DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
++
++ spin_unlock_irqrestore(&pool->lock, flags);
++
++ *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
++
++ return pool->virt_addr + pos * PAGE_SIZE;
++}
++
++static void
++bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
++{
++ unsigned long pages;
++ unsigned long flags;
++ unsigned long pos;
++
++ pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
++ pos = (buf - pool->virt_addr)/PAGE_SIZE;
++
++ BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
++
++ spin_lock_irqsave(&pool->lock, flags);
++ bitmap_clear(pool->alloc_map, pos, pages);
++ pool->num_pages -= pages;
++ if (pool->num_pages == 0)
++ pool->alloc_pos = 0;
++ DO_STATS(pool->num_bufs--);
++ spin_unlock_irqrestore(&pool->lock, flags);
++}
++
++/* allocate a 'safe' buffer and keep track of it */
++static struct safe_buffer *
++alloc_safe_buffer(struct dmabounce_device_info *device_info,
++ dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
++{
++ struct safe_buffer *buf;
++ struct dmabounce_pool *pool = &device_info->pool;
++ struct device *dev = device_info->dev;
++ unsigned long flags;
++
++ /*
++ * Although one might expect this to be called in thread context,
++ * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
++ * was previously used to select the appropriate allocation mode,
++ * but this is unsafe.
++ */
++ buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
++ if (!buf) {
++ dev_warn(dev, "%s: kmalloc failed\n", __func__);
++ return NULL;
++ }
++
++ buf->unsafe_dma_addr = dma_addr;
++ buf->size = size;
++ buf->direction = dir;
++ buf->pool = pool;
++
++ buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
++
++ if (!buf->safe) {
++ dev_warn(dev,
++ "%s: could not alloc dma memory (size=%d)\n",
++ __func__, size);
++ kfree(buf);
++ return NULL;
++ }
++
++ write_lock_irqsave(&device_info->lock, flags);
++ list_add(&buf->node, &device_info->safe_buffers);
++ write_unlock_irqrestore(&device_info->lock, flags);
++
++ return buf;
++}
++
++/* determine if a buffer is from our "safe" pool */
++static struct safe_buffer *
++find_safe_buffer(struct dmabounce_device_info *device_info,
++ dma_addr_t safe_dma_addr)
++{
++ struct safe_buffer *b, *rb = NULL;
++ unsigned long flags;
++
++ read_lock_irqsave(&device_info->lock, flags);
++
++ list_for_each_entry(b, &device_info->safe_buffers, node)
++ if (b->safe_dma_addr <= safe_dma_addr &&
++ b->safe_dma_addr + b->size > safe_dma_addr) {
++ rb = b;
++ break;
++ }
++
++ read_unlock_irqrestore(&device_info->lock, flags);
++ return rb;
++}
++
++static void
++free_safe_buffer(struct dmabounce_device_info *device_info,
++ struct safe_buffer *buf)
++{
++ unsigned long flags;
++
++ write_lock_irqsave(&device_info->lock, flags);
++ list_del(&buf->node);
++ write_unlock_irqrestore(&device_info->lock, flags);
++
++ bounce_free(buf->pool, buf->safe, buf->size);
++
++ kfree(buf);
++}
++
++/* ************************************************** */
++
++static struct safe_buffer *
++find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
++{
++ if (!dev || !g_dmabounce_device_info)
++ return NULL;
++ if (dma_mapping_error(dev, dma_addr)) {
++ dev_err(dev, "Trying to %s invalid mapping\n", where);
++ return NULL;
++ }
++ return find_safe_buffer(g_dmabounce_device_info, dma_addr);
++}
++
++static dma_addr_t
++map_single(struct device *dev, struct safe_buffer *buf, size_t size,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ BUG_ON(buf->size != size);
++ BUG_ON(buf->direction != dir);
++
++ dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
++ (u64)buf->safe_dma_addr);
++
++ if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
++ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
++ bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
++ size);
++
++ return buf->safe_dma_addr;
++}
++
++static dma_addr_t
++unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ BUG_ON(buf->size != size);
++ BUG_ON(buf->direction != dir);
++
++ if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
++ !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
++ dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
++ (u64)buf->unsafe_dma_addr);
++
++ bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
++ size);
++ }
++ return buf->unsafe_dma_addr;
++}
++
++/* ************************************************** */
++
++/*
++ * see if a buffer address is in an 'unsafe' range. if it is
++ * allocate a 'safe' buffer and copy the unsafe buffer into it.
++ * substitute the safe buffer for the unsafe one.
++ * (basically move the buffer from an unsafe area to a safe one)
++ */
++static dma_addr_t
++dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
++ size_t size, enum dma_data_direction dir,
++ unsigned long attrs)
++{
++ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
++ dma_addr_t dma_addr;
++
++ dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
++
++ swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
++ if (!is_device_dma_coherent(dev))
++ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
++
++ if (device_info && (dma_addr + size) > device_info->threshold) {
++ struct safe_buffer *buf;
++
++ buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
++ if (!buf) {
++ DO_STATS(device_info->fail_count++);
++ return (~(dma_addr_t)0x0);
++ }
++
++ DO_STATS(device_info->map_count++);
++
++ dma_addr = map_single(dev, buf, size, dir, attrs);
++ }
++ return dma_addr;
++}
++
++/*
++ * see if a mapped address was really a "safe" buffer and if so, copy
++ * the data from the safe buffer back to the unsafe buffer and free up
++ * the safe buffer. (basically return things back to the way they
++ * should be)
++ */
++static void
++dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
++ enum dma_data_direction dir, unsigned long attrs)
++{
++ struct safe_buffer *buf;
++
++ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
++ if (buf) {
++ DO_STATS(g_dmabounce_device_info->unmap_count++);
++ dma_addr = unmap_single(dev, buf, size, dir, attrs);
++ free_safe_buffer(g_dmabounce_device_info, buf);
++ }
++
++ if (!is_device_dma_coherent(dev))
++ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
++ swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
++}
++
++/*
++ * A version of dmabounce_map_page that assumes the mapping has already
++ * been created - intended for streaming operation.
++ */
++static void
++dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
++ enum dma_data_direction dir)
++{
++ struct safe_buffer *buf;
++
++ swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
++ if (!is_device_dma_coherent(dev))
++ __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
++
++ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
++ if (buf) {
++ DO_STATS(g_dmabounce_device_info->sync_dev_count++);
++ map_single(dev, buf, size, dir, 0);
++ }
++}
++
++/*
++ * A version of dmabounce_unmap_page that doesn't destroy the mapping -
++ * intended for streaming operation.
++ */
++static void
++dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
++ size_t size, enum dma_data_direction dir)
++{
++ struct safe_buffer *buf;
++
++ buf = find_safe_buffer_dev(dev, dma_addr, __func__);
++ if (buf) {
++ DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
++ dma_addr = unmap_single(dev, buf, size, dir, 0);
++ }
++
++ if (!is_device_dma_coherent(dev))
++ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
++ swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
++}
++
++static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
++{
++ if (g_dmabounce_device_info)
++ return 0;
++
++ return swiotlb_dma_supported(dev, dma_mask);
++}
++
++static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
++{
++ return swiotlb_dma_mapping_error(dev, dma_addr);
++}
++
++static const struct dma_map_ops dmabounce_ops = {
++ .alloc = arm64_dma_alloc,
++ .free = arm64_dma_free,
++ .mmap = arm64_dma_mmap,
++ .get_sgtable = arm64_dma_get_sgtable,
++ .map_page = dmabounce_map_page,
++ .unmap_page = dmabounce_unmap_page,
++ .sync_single_for_cpu = dmabounce_sync_for_cpu,
++ .sync_single_for_device = dmabounce_sync_for_device,
++ .map_sg = arm64_dma_map_sg,
++ .unmap_sg = arm64_dma_unmap_sg,
++ .sync_sg_for_cpu = arm64_dma_sync_sg_for_cpu,
++ .sync_sg_for_device = arm64_dma_sync_sg_for_device,
++ .dma_supported = dmabounce_dma_supported,
++ .mapping_error = dmabounce_mapping_error,
++};
++
++int brcm_pcie_bounce_init(struct device *dev,
++ unsigned long buffer_size,
++ dma_addr_t threshold)
++{
++ struct dmabounce_device_info *device_info;
++ int ret;
++
++ /* Only support a single client */
++ if (g_dmabounce_device_info)
++ return -EBUSY;
++
++ ret = bcm2838_dma40_memcpy_init();
++ if (ret)
++ return ret;
++
++ device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
++ if (!device_info) {
++ dev_err(dev,
++ "Could not allocated dmabounce_device_info\n");
++ return -ENOMEM;
++ }
++
++ ret = bounce_create(&device_info->pool, dev, buffer_size);
++ if (ret) {
++ dev_err(dev,
++ "dmabounce: could not allocate %ld byte DMA pool\n",
++ buffer_size);
++ goto err_bounce;
++ }
++
++ device_info->dev = dev;
++ device_info->threshold = threshold;
++ INIT_LIST_HEAD(&device_info->safe_buffers);
++ rwlock_init(&device_info->lock);
++
++ DO_STATS(device_info->map_count = 0);
++ DO_STATS(device_info->unmap_count = 0);
++ DO_STATS(device_info->sync_dev_count = 0);
++ DO_STATS(device_info->sync_cpu_count = 0);
++ DO_STATS(device_info->fail_count = 0);
++ DO_STATS(device_info->attr_res =
++ device_create_file(dev, &dev_attr_dmabounce_stats));
++
++ g_dmabounce_device_info = device_info;
++
++ dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
++ buffer_size / 1024, &threshold);
++
++ return 0;
++
++ err_bounce:
++ kfree(device_info);
++ return ret;
++}
++EXPORT_SYMBOL(brcm_pcie_bounce_init);
++
++void brcm_pcie_bounce_uninit(struct device *dev)
++{
++ struct dmabounce_device_info *device_info = g_dmabounce_device_info;
++
++ g_dmabounce_device_info = NULL;
++
++ if (!device_info) {
++ dev_warn(dev,
++ "Never registered with dmabounce but attempting"
++ "to unregister!\n");
++ return;
++ }
++
++ if (!list_empty(&device_info->safe_buffers)) {
++ dev_err(dev,
++ "Removing from dmabounce with pending buffers!\n");
++ BUG();
++ }
++
++ bounce_destroy(&device_info->pool);
++
++ DO_STATS(if (device_info->attr_res == 0)
++ device_remove_file(dev, &dev_attr_dmabounce_stats));
++
++ kfree(device_info);
++}
++EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
++
++int brcm_pcie_bounce_register_dev(struct device *dev)
++{
++ set_dma_ops(dev, &dmabounce_ops);
++
++ return 0;
++}
++EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
++
++MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
++MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
++MODULE_LICENSE("GPL");
+--- a/drivers/pci/controller/pcie-brcmstb.c
++++ b/drivers/pci/controller/pcie-brcmstb.c
+@@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma
+
+ static void brcm_set_dma_ops(struct device *dev)
+ {
+- int ret;
+-
+- if (IS_ENABLED(CONFIG_ARM64)) {
+- /*
+- * We are going to invoke get_dma_ops(). That
+- * function, at this point in time, invokes
+- * get_arch_dma_ops(), and for ARM64 that function
+- * returns a pointer to dummy_dma_ops. So then we'd
+- * like to call arch_setup_dma_ops(), but that isn't
+- * exported. Instead, we call of_dma_configure(),
+- * which is exported, and this calls
+- * arch_setup_dma_ops(). Once we do this the call to
+- * get_dma_ops() will work properly because
+- * dev->dma_ops will be set.
+- */
+- ret = of_dma_configure(dev, dev->of_node, true);
+- if (ret) {
+- dev_err(dev, "of_dma_configure() failed: %d\n", ret);
+- return;
+- }
+- }
+-
+ arch_dma_ops = get_dma_ops(dev);
+ if (!arch_dma_ops) {
+ dev_err(dev, "failed to get arch_dma_ops\n");
+@@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(str
+ extern unsigned long max_pfn;
+ struct device *dev = __dev;
+ const char *rc_name = "0000:00:00.0";
++ int ret;
+
+ switch (event) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
+ strcmp(dev->kobj.name, rc_name)) {
+- int ret;
+
+ ret = brcm_pcie_bounce_register_dev(dev);
+ if (ret) {
+@@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(str
+ ret);
+ return ret;
+ }
++ } else if (IS_ENABLED(CONFIG_ARM64)) {
++ ret = of_dma_configure(dev, dev->of_node, true);
++ if (ret) {
++ dev_err(dev, "of_dma_configure() failed: %d\n", ret);
++ return;
++ }
+ }
+ brcm_set_dma_ops(dev);
+ return NOTIFY_OK;