From 545951be6cabac8b1df85771c44335a0eaaa3c5d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 19 Feb 2019 22:06:59 +0000 Subject: [PATCH] pcie-brcmstb: Changes for BCM2711 The initial brcmstb PCIe driver - originally taken from the V3(?) patch set - has been modified significantly for the BCM2711. Signed-off-by: Phil Elwell --- drivers/dma/bcm2835-dma.c | 107 ++++ drivers/pci/controller/Makefile | 4 + drivers/pci/controller/pcie-brcmstb-bounce.c | 564 +++++++++++++++++++ drivers/pci/controller/pcie-brcmstb-bounce.h | 32 ++ drivers/pci/controller/pcie-brcmstb.c | 237 ++++---- drivers/soc/bcm/brcmstb/Makefile | 2 +- drivers/soc/bcm/brcmstb/memory.c | 158 ++++++ 7 files changed, 996 insertions(+), 108 deletions(-) create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.c create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.h create mode 100644 drivers/soc/bcm/brcmstb/memory.c --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -68,6 +68,17 @@ struct bcm2835_dma_cb { uint32_t pad[2]; }; +struct bcm2838_dma40_scb { + uint32_t ti; + uint32_t src; + uint32_t srci; + uint32_t dst; + uint32_t dsti; + uint32_t len; + uint32_t next_cb; + uint32_t rsvd; +}; + struct bcm2835_cb_entry { struct bcm2835_dma_cb *cb; dma_addr_t paddr; @@ -185,6 +196,45 @@ struct bcm2835_desc { #define MAX_DMA_LEN SZ_1G #define MAX_LITE_DMA_LEN (SZ_64K - 4) +/* 40-bit DMA support */ +#define BCM2838_DMA40_CS 0x00 +#define BCM2838_DMA40_CB 0x04 +#define BCM2838_DMA40_DEBUG 0x0c +#define BCM2858_DMA40_TI 0x10 +#define BCM2838_DMA40_SRC 0x14 +#define BCM2838_DMA40_SRCI 0x18 +#define BCM2838_DMA40_DEST 0x1c +#define BCM2838_DMA40_DESTI 0x20 +#define BCM2838_DMA40_LEN 0x24 +#define BCM2838_DMA40_NEXT_CB 0x28 +#define BCM2838_DMA40_DEBUG2 0x2c + +#define BCM2838_DMA40_CS_ACTIVE BIT(0) +#define BCM2838_DMA40_CS_END BIT(1) + +#define BCM2838_DMA40_CS_QOS(x) (((x) & 0x1f) << 16) +#define BCM2838_DMA40_CS_PANIC_QOS(x) (((x) & 0x1f) << 20) +#define BCM2838_DMA40_CS_WRITE_WAIT BIT(28) + +#define BCM2838_DMA40_BURST_LEN(x) ((((x) - 1) & 0xf) << 8) +#define BCM2838_DMA40_INC BIT(12) +#define BCM2838_DMA40_SIZE_128 (2 << 13) + +#define BCM2838_DMA40_MEMCPY_QOS \ + (BCM2838_DMA40_CS_QOS(0x0) | \ + BCM2838_DMA40_CS_PANIC_QOS(0x0) | \ + BCM2838_DMA40_CS_WRITE_WAIT) + +#define BCM2838_DMA40_MEMCPY_XFER_INFO \ + (BCM2838_DMA40_SIZE_128 | \ + BCM2838_DMA40_INC | \ + BCM2838_DMA40_BURST_LEN(16)) + +static void __iomem *memcpy_chan; +static struct bcm2838_dma40_scb *memcpy_scb; +static dma_addr_t memcpy_scb_dma; +DEFINE_SPINLOCK(memcpy_lock); + static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c) { /* lite and normal channels have different max frame length */ @@ -868,6 +918,56 @@ static void bcm2835_dma_free(struct bcm2 } } +int bcm2838_dma40_memcpy_init(struct device *dev) +{ + if (memcpy_scb) + return 0; + + memcpy_scb = dma_alloc_coherent(dev, sizeof(*memcpy_scb), + &memcpy_scb_dma, GFP_KERNEL); + + if (!memcpy_scb) { + pr_err("bcm2838_dma40_memcpy_init failed!\n"); + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL(bcm2838_dma40_memcpy_init); + +void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size) +{ + struct bcm2838_dma40_scb *scb = memcpy_scb; + unsigned long flags; + + if (!scb) { + pr_err("bcm2838_dma40_memcpy not initialised!\n"); + return; + } + + spin_lock_irqsave(&memcpy_lock, flags); + + scb->ti = 0; + scb->src = lower_32_bits(src); + scb->srci = upper_32_bits(src) | BCM2838_DMA40_MEMCPY_XFER_INFO; + scb->dst = lower_32_bits(dst); + scb->dsti = upper_32_bits(dst) | BCM2838_DMA40_MEMCPY_XFER_INFO; + scb->len = size; + scb->next_cb = 0; + + writel((u32)(memcpy_scb_dma >> 5), memcpy_chan + BCM2838_DMA40_CB); + writel(BCM2838_DMA40_MEMCPY_QOS + BCM2838_DMA40_CS_ACTIVE, + memcpy_chan + BCM2838_DMA40_CS); + /* Poll for completion */ + while (!(readl(memcpy_chan + BCM2838_DMA40_CS) & BCM2838_DMA40_CS_END)) + cpu_relax(); + + writel(BCM2838_DMA40_CS_END, memcpy_chan + BCM2838_DMA40_CS); + + spin_unlock_irqrestore(&memcpy_lock, flags); +} +EXPORT_SYMBOL(bcm2838_dma40_memcpy); + static const struct of_device_id bcm2835_dma_of_match[] = { { .compatible = "brcm,bcm2835-dma", }, {}, @@ -966,6 +1066,13 @@ static int bcm2835_dma_probe(struct plat /* Channel 0 is used by the legacy API */ chans_available &= ~BCM2835_DMA_BULK_MASK; + /* We can't use channels 11-13 yet */ + chans_available &= ~(BIT(11) | BIT(12) | BIT(13)); + + /* Grab channel 14 for the 40-bit DMA memcpy */ + chans_available &= ~BIT(14); + memcpy_chan = BCM2835_DMA_CHANIO(base, 14); + /* get irqs for each channel that we support */ for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) { /* skip masked out channels */ --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -29,6 +29,10 @@ obj-$(CONFIG_PCIE_MEDIATEK) += pcie-medi obj-$(CONFIG_PCIE_MOBIVEIL) += pcie-mobiveil.o obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o +ifdef CONFIG_ARM +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o +endif + obj-$(CONFIG_VMD) += vmd.o # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW obj-y += dwc/ --- /dev/null +++ b/drivers/pci/controller/pcie-brcmstb-bounce.c @@ -0,0 +1,564 @@ +/* + * This code started out as a version of arch/arm/common/dmabounce.c, + * modified to cope with highmem pages. Now it has been changed heavily - + * it now preallocates a large block (currently 4MB) and carves it up + * sequentially in ring fashion, and DMA is used to copy the data - to the + * point where very little of the original remains. + * + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd. + * + * Original version by Brad Parker (brad@heeltoe.com) + * Re-written by Christopher Hoover + * Made generic by Deepak Saxena + * + * Copyright (C) 2002 Hewlett Packard Company. + * Copyright (C) 2004 MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define STATS + +#ifdef STATS +#define DO_STATS(X) do { X ; } while (0) +#else +#define DO_STATS(X) do { } while (0) +#endif + +/* ************************************************** */ + +struct safe_buffer { + struct list_head node; + + /* original request */ + size_t size; + int direction; + + struct dmabounce_pool *pool; + void *safe; + dma_addr_t unsafe_dma_addr; + dma_addr_t safe_dma_addr; +}; + +struct dmabounce_pool { + unsigned long pages; + void *virt_addr; + dma_addr_t dma_addr; + unsigned long *alloc_map; + unsigned long alloc_pos; + spinlock_t lock; + struct device *dev; + unsigned long num_pages; +#ifdef STATS + size_t max_size; + unsigned long num_bufs; + unsigned long max_bufs; + unsigned long max_pages; +#endif +}; + +struct dmabounce_device_info { + struct device *dev; + dma_addr_t threshold; + struct list_head safe_buffers; + struct dmabounce_pool pool; + rwlock_t lock; +#ifdef STATS + unsigned long map_count; + unsigned long unmap_count; + unsigned long sync_dev_count; + unsigned long sync_cpu_count; + unsigned long fail_count; + int attr_res; +#endif +}; + +static struct dmabounce_device_info *g_dmabounce_device_info; + +extern int bcm2838_dma40_memcpy_init(struct device *dev); +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size); + +#ifdef STATS +static ssize_t +bounce_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n", + device_info->map_count, + device_info->unmap_count, + device_info->sync_dev_count, + device_info->sync_cpu_count, + device_info->fail_count, + device_info->pool.max_size, + device_info->pool.num_bufs, + device_info->pool.max_bufs, + device_info->pool.num_pages * PAGE_SIZE, + device_info->pool.max_pages * PAGE_SIZE); +} + +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL); +#endif + +static int bounce_create(struct dmabounce_pool *pool, struct device *dev, + unsigned long buffer_size) +{ + int ret = -ENOMEM; + pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE; + pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL); + if (!pool->alloc_map) + goto err_bitmap; + pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE, + &pool->dma_addr, GFP_KERNEL); + if (!pool->virt_addr) + goto err_dmabuf; + + pool->alloc_pos = 0; + spin_lock_init(&pool->lock); + pool->dev = dev; + pool->num_pages = 0; + + DO_STATS(pool->max_size = 0); + DO_STATS(pool->num_bufs = 0); + DO_STATS(pool->max_bufs = 0); + DO_STATS(pool->max_pages = 0); + + return 0; + +err_dmabuf: + bitmap_free(pool->alloc_map); +err_bitmap: + return ret; +} + +static void bounce_destroy(struct dmabounce_pool *pool) +{ + dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr, + pool->dma_addr); + + bitmap_free(pool->alloc_map); +} + +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size, + dma_addr_t *dmaaddrp) +{ + unsigned long pages; + unsigned long flags; + unsigned long pos; + + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; + + DO_STATS(pool->max_size = max(size, pool->max_size)); + + spin_lock_irqsave(&pool->lock, flags); + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, + pool->alloc_pos, pages, 0); + /* If not found, try from the start */ + if (pos >= pool->pages && pool->alloc_pos) + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages, + 0, pages, 0); + + if (pos >= pool->pages) { + spin_unlock_irqrestore(&pool->lock, flags); + return NULL; + } + + bitmap_set(pool->alloc_map, pos, pages); + pool->alloc_pos = (pos + pages) % pool->pages; + pool->num_pages += pages; + + DO_STATS(pool->num_bufs++); + DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs)); + DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages)); + + spin_unlock_irqrestore(&pool->lock, flags); + + *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE; + + return pool->virt_addr + pos * PAGE_SIZE; +} + +static void +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size) +{ + unsigned long pages; + unsigned long flags; + unsigned long pos; + + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE; + pos = (buf - pool->virt_addr)/PAGE_SIZE; + + BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1)); + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->alloc_map, pos, pages); + pool->num_pages -= pages; + if (pool->num_pages == 0) + pool->alloc_pos = 0; + DO_STATS(pool->num_bufs--); + spin_unlock_irqrestore(&pool->lock, flags); +} + +/* allocate a 'safe' buffer and keep track of it */ +static struct safe_buffer * +alloc_safe_buffer(struct dmabounce_device_info *device_info, + dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) +{ + struct safe_buffer *buf; + struct dmabounce_pool *pool = &device_info->pool; + struct device *dev = device_info->dev; + unsigned long flags; + + /* + * Although one might expect this to be called in thread context, + * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic() + * was previously used to select the appropriate allocation mode, + * but this is unsafe. + */ + buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC); + if (!buf) { + dev_warn(dev, "%s: kmalloc failed\n", __func__); + return NULL; + } + + buf->unsafe_dma_addr = dma_addr; + buf->size = size; + buf->direction = dir; + buf->pool = pool; + + buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr); + + if (!buf->safe) { + dev_warn(dev, + "%s: could not alloc dma memory (size=%d)\n", + __func__, size); + kfree(buf); + return NULL; + } + + write_lock_irqsave(&device_info->lock, flags); + list_add(&buf->node, &device_info->safe_buffers); + write_unlock_irqrestore(&device_info->lock, flags); + + return buf; +} + +/* determine if a buffer is from our "safe" pool */ +static struct safe_buffer * +find_safe_buffer(struct dmabounce_device_info *device_info, + dma_addr_t safe_dma_addr) +{ + struct safe_buffer *b, *rb = NULL; + unsigned long flags; + + read_lock_irqsave(&device_info->lock, flags); + + list_for_each_entry(b, &device_info->safe_buffers, node) + if (b->safe_dma_addr <= safe_dma_addr && + b->safe_dma_addr + b->size > safe_dma_addr) { + rb = b; + break; + } + + read_unlock_irqrestore(&device_info->lock, flags); + return rb; +} + +static void +free_safe_buffer(struct dmabounce_device_info *device_info, + struct safe_buffer *buf) +{ + unsigned long flags; + + write_lock_irqsave(&device_info->lock, flags); + list_del(&buf->node); + write_unlock_irqrestore(&device_info->lock, flags); + + bounce_free(buf->pool, buf->safe, buf->size); + + kfree(buf); +} + +/* ************************************************** */ + +static struct safe_buffer * +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where) +{ + if (!dev || !g_dmabounce_device_info) + return NULL; + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "Trying to %s invalid mapping\n", where); + return NULL; + } + return find_safe_buffer(g_dmabounce_device_info, dma_addr); +} + +static dma_addr_t +map_single(struct device *dev, struct safe_buffer *buf, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + BUG_ON(buf->size != size); + BUG_ON(buf->direction != dir); + + dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr, + (u64)buf->safe_dma_addr); + + if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr, + size); + + return buf->safe_dma_addr; +} + +static dma_addr_t +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + BUG_ON(buf->size != size); + BUG_ON(buf->direction != dir); + + if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr, + (u64)buf->unsafe_dma_addr); + + bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr, + size); + } + return buf->unsafe_dma_addr; +} + +/* ************************************************** */ + +/* + * see if a buffer address is in an 'unsafe' range. if it is + * allocate a 'safe' buffer and copy the unsafe buffer into it. + * substitute the safe buffer for the unsafe one. + * (basically move the buffer from an unsafe area to a safe one) + */ +static dma_addr_t +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + dma_addr_t dma_addr; + + dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset; + + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); + + if (device_info && (dma_addr + size) > device_info->threshold) { + struct safe_buffer *buf; + + buf = alloc_safe_buffer(device_info, dma_addr, size, dir); + if (!buf) { + DO_STATS(device_info->fail_count++); + return ARM_MAPPING_ERROR; + } + + DO_STATS(device_info->map_count++); + + dma_addr = map_single(dev, buf, size, dir, attrs); + } + + return dma_addr; +} + +/* + * see if a mapped address was really a "safe" buffer and if so, copy + * the data from the safe buffer back to the unsafe buffer and free up + * the safe buffer. (basically return things back to the way they + * should be) + */ +static void +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct safe_buffer *buf; + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->unmap_count++); + dma_addr = unmap_single(dev, buf, size, dir, attrs); + free_safe_buffer(g_dmabounce_device_info, buf); + } + + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); +} + +/* + * A version of dmabounce_map_page that assumes the mapping has already + * been created - intended for streaming operation. + */ +static void +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir) +{ + struct safe_buffer *buf; + + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->sync_dev_count++); + map_single(dev, buf, size, dir, 0); + } +} + +/* + * A version of dmabounce_unmap_page that doesn't destroy the mapping - + * intended for streaming operation. + */ +static void +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir) +{ + struct safe_buffer *buf; + + buf = find_safe_buffer_dev(dev, dma_addr, __func__); + if (buf) { + DO_STATS(g_dmabounce_device_info->sync_cpu_count++); + dma_addr = unmap_single(dev, buf, size, dir, 0); + } + + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); +} + +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask) +{ + if (g_dmabounce_device_info) + return 0; + + return arm_dma_ops.dma_supported(dev, dma_mask); +} + +static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return arm_dma_ops.mapping_error(dev, dma_addr); +} + +static const struct dma_map_ops dmabounce_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = dmabounce_map_page, + .unmap_page = dmabounce_unmap_page, + .sync_single_for_cpu = dmabounce_sync_for_cpu, + .sync_single_for_device = dmabounce_sync_for_device, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .dma_supported = dmabounce_dma_supported, + .mapping_error = dmabounce_mapping_error, +}; + +int brcm_pcie_bounce_register_dev(struct device *dev, + unsigned long buffer_size, + dma_addr_t threshold) +{ + struct dmabounce_device_info *device_info; + int ret; + + /* Only support a single client */ + if (g_dmabounce_device_info) + return -EBUSY; + + ret = bcm2838_dma40_memcpy_init(dev); + if (ret) + return ret; + + device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC); + if (!device_info) { + dev_err(dev, + "Could not allocated dmabounce_device_info\n"); + return -ENOMEM; + } + + ret = bounce_create(&device_info->pool, dev, buffer_size); + if (ret) { + dev_err(dev, + "dmabounce: could not allocate %ld byte DMA pool\n", + buffer_size); + goto err_bounce; + } + + device_info->dev = dev; + device_info->threshold = threshold; + INIT_LIST_HEAD(&device_info->safe_buffers); + rwlock_init(&device_info->lock); + + DO_STATS(device_info->map_count = 0); + DO_STATS(device_info->unmap_count = 0); + DO_STATS(device_info->sync_dev_count = 0); + DO_STATS(device_info->sync_cpu_count = 0); + DO_STATS(device_info->fail_count = 0); + DO_STATS(device_info->attr_res = + device_create_file(dev, &dev_attr_dmabounce_stats)); + + g_dmabounce_device_info = device_info; + set_dma_ops(dev, &dmabounce_ops); + + dev_info(dev, "dmabounce: registered device - %ld kB, threshold %pad\n", + buffer_size / 1024, &threshold); + + return 0; + + err_bounce: + kfree(device_info); + return ret; +} +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev); + +void brcm_pcie_bounce_unregister_dev(struct device *dev) +{ + struct dmabounce_device_info *device_info = g_dmabounce_device_info; + + g_dmabounce_device_info = NULL; + set_dma_ops(dev, NULL); + + if (!device_info) { + dev_warn(dev, + "Never registered with dmabounce but attempting" + "to unregister!\n"); + return; + } + + if (!list_empty(&device_info->safe_buffers)) { + dev_err(dev, + "Removing from dmabounce with pending buffers!\n"); + BUG(); + } + + bounce_destroy(&device_info->pool); + + DO_STATS(if (device_info->attr_res == 0) + device_remove_file(dev, &dev_attr_dmabounce_stats)); + + kfree(device_info); + + dev_info(dev, "dmabounce: device unregistered\n"); +} +EXPORT_SYMBOL(brcm_pcie_bounce_unregister_dev); + +MODULE_AUTHOR("Phil Elwell "); +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb"); +MODULE_LICENSE("GPL"); --- /dev/null +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd. + */ + +#ifndef _PCIE_BRCMSTB_BOUNCE_H +#define _PCIE_BRCMSTB_BOUNCE_H + +#ifdef CONFIG_ARM + +int brcm_pcie_bounce_register_dev(struct device *dev, unsigned long buffer_size, + dma_addr_t threshold); + +int brcm_pcie_bounce_unregister_dev(struct device *dev); + +#else + +static inline int brcm_pcie_bounce_register_dev(struct device *dev, + unsigned long buffer_size, + dma_addr_t threshold) +{ + return 0; +} + +static inline int brcm_pcie_bounce_unregister_dev(struct device *dev) +{ + return 0; +} + +#endif + +#endif /* _PCIE_BRCMSTB_BOUNCE_H */ --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -29,6 +29,7 @@ #include #include #include "../pci.h" +#include "pcie-brcmstb-bounce.h" /* BRCM_PCIE_CAP_REGS - Offset for the mandatory capability config regs */ #define BRCM_PCIE_CAP_REGS 0x00ac @@ -53,6 +54,7 @@ #define PCIE_MISC_MSI_BAR_CONFIG_LO 0x4044 #define PCIE_MISC_MSI_BAR_CONFIG_HI 0x4048 #define PCIE_MISC_MSI_DATA_CONFIG 0x404c +#define PCIE_MISC_EOI_CTRL 0x4060 #define PCIE_MISC_PCIE_CTRL 0x4064 #define PCIE_MISC_PCIE_STATUS 0x4068 #define PCIE_MISC_REVISION 0x406c @@ -260,12 +262,14 @@ struct brcm_pcie { unsigned int rev; const int *reg_offsets; const int *reg_field_info; + u32 max_burst_size; enum pcie_type type; }; struct pcie_cfg_data { const int *reg_field_info; const int *offsets; + const u32 max_burst_size; const enum pcie_type type; }; @@ -288,24 +292,27 @@ static const int pcie_offset_bcm7425[] = static const struct pcie_cfg_data bcm7425_cfg = { .reg_field_info = pcie_reg_field_info, .offsets = pcie_offset_bcm7425, + .max_burst_size = BURST_SIZE_256, .type = BCM7425, }; static const int pcie_offsets[] = { [RGR1_SW_INIT_1] = 0x9210, [EXT_CFG_INDEX] = 0x9000, - [EXT_CFG_DATA] = 0x9004, + [EXT_CFG_DATA] = 0x8000, }; static const struct pcie_cfg_data bcm7435_cfg = { .reg_field_info = pcie_reg_field_info, .offsets = pcie_offsets, + .max_burst_size = BURST_SIZE_256, .type = BCM7435, }; static const struct pcie_cfg_data generic_cfg = { .reg_field_info = pcie_reg_field_info, .offsets = pcie_offsets, + .max_burst_size = BURST_SIZE_128, // before BURST_SIZE_512 .type = GENERIC, }; @@ -318,6 +325,7 @@ static const int pcie_offset_bcm7278[] = static const struct pcie_cfg_data bcm7278_cfg = { .reg_field_info = pcie_reg_field_info_bcm7278, .offsets = pcie_offset_bcm7278, + .max_burst_size = BURST_SIZE_512, .type = BCM7278, }; @@ -360,7 +368,6 @@ static struct pci_ops brcm_pcie_ops = { (reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT))) static const struct dma_map_ops *arch_dma_ops; -static const struct dma_map_ops *brcm_dma_ops_ptr; static struct of_pci_range *dma_ranges; static int num_dma_ranges; @@ -369,6 +376,16 @@ static int num_memc; static int num_pcie; static DEFINE_MUTEX(brcm_pcie_lock); +static unsigned int bounce_buffer = 32*1024*1024; +module_param(bounce_buffer, uint, 0644); +MODULE_PARM_DESC(bounce_buffer, "Size of bounce buffer"); + +static unsigned int bounce_threshold = 0xc0000000; +module_param(bounce_threshold, uint, 0644); +MODULE_PARM_DESC(bounce_threshold, "Bounce threshold"); + +static struct brcm_pcie *g_pcie; + static dma_addr_t brcm_to_pci(dma_addr_t addr) { struct of_pci_range *p; @@ -457,12 +474,10 @@ static int brcm_map_sg(struct device *de struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) { -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->dma_length = sg->length; -#endif + sg_dma_len(sg) = sg->length; sg->dma_address = - brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset, - sg->length, dir, attrs); + brcm_map_page(dev, sg_page(sg), sg->offset, + sg->length, dir, attrs); if (dma_mapping_error(dev, sg->dma_address)) goto bad_mapping; } @@ -470,8 +485,8 @@ static int brcm_map_sg(struct device *de bad_mapping: for_each_sg(sgl, sg, i, j) - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg), - sg_dma_len(sg), dir, attrs); + brcm_unmap_page(dev, sg_dma_address(sg), + sg_dma_len(sg), dir, attrs); return 0; } @@ -484,8 +499,8 @@ static void brcm_unmap_sg(struct device struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg), - sg_dma_len(sg), dir, attrs); + brcm_unmap_page(dev, sg_dma_address(sg), + sg_dma_len(sg), dir, attrs); } static void brcm_sync_single_for_cpu(struct device *dev, @@ -531,8 +546,8 @@ void brcm_sync_sg_for_cpu(struct device int i; for_each_sg(sgl, sg, nents, i) - brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg), - sg->length, dir); + brcm_sync_single_for_cpu(dev, sg_dma_address(sg), + sg->length, dir); } void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, @@ -542,9 +557,9 @@ void brcm_sync_sg_for_device(struct devi int i; for_each_sg(sgl, sg, nents, i) - brcm_dma_ops_ptr->sync_single_for_device(dev, - sg_dma_address(sg), - sg->length, dir); + brcm_sync_single_for_device(dev, + sg_dma_address(sg), + sg->length, dir); } static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -633,17 +648,47 @@ static void brcm_set_dma_ops(struct devi set_dma_ops(dev, &brcm_dma_ops); } +static inline void brcm_pcie_perst_set(struct brcm_pcie *pcie, + unsigned int val); static int brcmstb_platform_notifier(struct notifier_block *nb, unsigned long event, void *__dev) { + extern unsigned long max_pfn; struct device *dev = __dev; + const char *rc_name = "0000:00:00.0"; - brcm_dma_ops_ptr = &brcm_dma_ops; - if (event != BUS_NOTIFY_ADD_DEVICE) - return NOTIFY_DONE; + switch (event) { + case BUS_NOTIFY_ADD_DEVICE: + if (max_pfn > (bounce_threshold/PAGE_SIZE) && + strcmp(dev->kobj.name, rc_name)) { + int ret; + + ret = brcm_pcie_bounce_register_dev(dev, bounce_buffer, + (dma_addr_t)bounce_threshold); + if (ret) { + dev_err(dev, + "brcm_pcie_bounce_register_dev() failed: %d\n", + ret); + return ret; + } + } + brcm_set_dma_ops(dev); + return NOTIFY_OK; + + case BUS_NOTIFY_DEL_DEVICE: + if (!strcmp(dev->kobj.name, rc_name) && g_pcie) { + /* Force a bus reset */ + brcm_pcie_perst_set(g_pcie, 1); + msleep(100); + brcm_pcie_perst_set(g_pcie, 0); + } else if (max_pfn > (bounce_threshold/PAGE_SIZE)) { + brcm_pcie_bounce_unregister_dev(dev); + } + return NOTIFY_OK; - brcm_set_dma_ops(dev); - return NOTIFY_OK; + default: + return NOTIFY_DONE; + } } static struct notifier_block brcmstb_platform_nb = { @@ -914,6 +959,7 @@ static void brcm_pcie_msi_isr(struct irq } } chained_irq_exit(chip, desc); + bcm_writel(1, msi->base + PCIE_MISC_EOI_CTRL); } static void brcm_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -930,7 +976,8 @@ static void brcm_compose_msi_msg(struct static int brcm_msi_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { - return -EINVAL; + struct brcm_msi *msi = irq_data_get_irq_chip_data(irq_data); + return __irq_set_affinity(msi->irq, mask, force); } static struct irq_chip brcm_msi_bottom_irq_chip = { @@ -1168,9 +1215,9 @@ static void __iomem *brcm_pcie_map_conf( return PCI_SLOT(devfn) ? NULL : base + where; /* For devices, write to the config space index register */ - idx = cfg_index(bus->number, devfn, where); + idx = cfg_index(bus->number, devfn, 0); bcm_writel(idx, pcie->base + IDX_ADDR(pcie)); - return base + DATA_ADDR(pcie) + (where & 0x3); + return base + DATA_ADDR(pcie) + where; } static inline void brcm_pcie_bridge_sw_init_set(struct brcm_pcie *pcie, @@ -1238,20 +1285,6 @@ static int brcm_pcie_parse_map_dma_range num_dma_ranges++; } - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) { - u64 size = brcmstb_memory_memc_size(i); - - if (size == (u64)-1) { - dev_err(pcie->dev, "cannot get memc%d size", i); - return -EINVAL; - } else if (size) { - scb_size[i] = roundup_pow_of_two_64(size); - num_memc++; - } else { - break; - } - } - return 0; } @@ -1275,26 +1308,25 @@ static int brcm_pcie_add_controller(stru if (ret) goto done; - /* Determine num_memc and their sizes */ - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) { - u64 size = brcmstb_memory_memc_size(i); - - if (size == (u64)-1) { - dev_err(dev, "cannot get memc%d size\n", i); - ret = -EINVAL; - goto done; - } else if (size) { - scb_size[i] = roundup_pow_of_two_64(size); - num_memc++; - } else { - break; + if (!num_dma_ranges) { + /* Determine num_memc and their sizes by other means */ + for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) { + u64 size = brcmstb_memory_memc_size(i); + + if (size == (u64)-1) { + dev_err(dev, "cannot get memc%d size\n", i); + ret = -EINVAL; + goto done; + } else if (size) { + scb_size[i] = roundup_pow_of_two_64(size); + } else { + break; + } } - } - if (!ret && num_memc == 0) { - ret = -EINVAL; - goto done; + num_memc = i; } + g_pcie = pcie; num_pcie++; done: mutex_unlock(&brcm_pcie_lock); @@ -1307,6 +1339,7 @@ static void brcm_pcie_remove_controller( if (--num_pcie > 0) goto out; + g_pcie = NULL; if (brcm_unregister_notifier()) dev_err(pcie->dev, "failed to unregister pci bus notifier\n"); kfree(dma_ranges); @@ -1367,7 +1400,7 @@ static int brcm_pcie_setup(struct brcm_p void __iomem *base = pcie->base; unsigned int scb_size_val; u64 rc_bar2_offset, rc_bar2_size, total_mem_size = 0; - u32 tmp, burst; + u32 tmp; int i, j, ret, limit; u16 nlw, cls, lnksta; bool ssc_good = false; @@ -1400,20 +1433,15 @@ static int brcm_pcie_setup(struct brcm_p /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */ tmp = INSERT_FIELD(0, PCIE_MISC_MISC_CTRL, SCB_ACCESS_EN, 1); tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, CFG_READ_UR_MODE, 1); - burst = (pcie->type == GENERIC || pcie->type == BCM7278) - ? BURST_SIZE_512 : BURST_SIZE_256; - tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE, burst); + tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE, + pcie->max_burst_size); bcm_writel(tmp, base + PCIE_MISC_MISC_CTRL); /* * Set up inbound memory view for the EP (called RC_BAR2, * not to be confused with the BARs that are advertised by * the EP). - */ - for (i = 0; i < num_memc; i++) - total_mem_size += scb_size[i]; - - /* + * * The PCIe host controller by design must set the inbound * viewport to be a contiguous arrangement of all of the * system's memory. In addition, its size mut be a power of @@ -1429,55 +1457,49 @@ static int brcm_pcie_setup(struct brcm_p * the controller will know to send outbound memory downstream * and everything else upstream. */ - rc_bar2_size = roundup_pow_of_two_64(total_mem_size); - if (dma_ranges) { + if (num_dma_ranges) { /* - * The best-case scenario is to place the inbound - * region in the first 4GB of pcie-space, as some - * legacy devices can only address 32bits. - * We would also like to put the MSI under 4GB - * as well, since some devices require a 32bit - * MSI target address. + * Use the base address and size(s) provided in the dma-ranges + * property. */ - if (total_mem_size <= 0xc0000000ULL && - rc_bar2_size <= 0x100000000ULL) { - rc_bar2_offset = 0; - /* If the viewport is less then 4GB we can fit - * the MSI target address under 4GB. Otherwise - * put it right below 64GB. - */ - msi_target_addr = - (rc_bar2_size == 0x100000000ULL) - ? BRCM_MSI_TARGET_ADDR_GT_4GB - : BRCM_MSI_TARGET_ADDR_LT_4GB; - } else { - /* - * The system memory is 4GB or larger so we - * cannot start the inbound region at location - * 0 (since we have to allow some space for - * outbound memory @ 3GB). So instead we - * start it at the 1x multiple of its size - */ - rc_bar2_offset = rc_bar2_size; - - /* Since we are starting the viewport at 4GB or - * higher, put the MSI target address below 4GB - */ - msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB; - } - } else { + for (i = 0; i < num_dma_ranges; i++) + scb_size[i] = roundup_pow_of_two_64(dma_ranges[i].size); + + num_memc = num_dma_ranges; + rc_bar2_offset = dma_ranges[0].pci_addr; + } else if (num_memc) { /* * Set simple configuration based on memory sizes - * only. We always start the viewport at address 0, - * and set the MSI target address accordingly. + * only. We always start the viewport at address 0. */ rc_bar2_offset = 0; + } else { + return -EINVAL; + } + + for (i = 0; i < num_memc; i++) + total_mem_size += scb_size[i]; + + rc_bar2_size = roundup_pow_of_two_64(total_mem_size); - msi_target_addr = (rc_bar2_size >= 0x100000000ULL) - ? BRCM_MSI_TARGET_ADDR_GT_4GB - : BRCM_MSI_TARGET_ADDR_LT_4GB; + /* Verify the alignment is correct */ + if (rc_bar2_offset & (rc_bar2_size - 1)) { + dev_err(dev, "inbound window is misaligned\n"); + return -EINVAL; } + + /* + * Position the MSI target low if possible. + * + * TO DO: Consider outbound window when choosing MSI target and + * verifying configuration. + */ + msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB; + if (rc_bar2_offset <= msi_target_addr && + rc_bar2_offset + rc_bar2_size > msi_target_addr) + msi_target_addr = BRCM_MSI_TARGET_ADDR_GT_4GB; + pcie->msi_target_addr = msi_target_addr; tmp = lower_32_bits(rc_bar2_offset); @@ -1713,6 +1735,7 @@ static int brcm_pcie_probe(struct platfo data = of_id->data; pcie->reg_offsets = data->offsets; pcie->reg_field_info = data->reg_field_info; + pcie->max_burst_size = data->max_burst_size; pcie->type = data->type; pcie->dn = dn; pcie->dev = &pdev->dev; @@ -1732,7 +1755,7 @@ static int brcm_pcie_probe(struct platfo pcie->clk = of_clk_get_by_name(dn, "sw_pcie"); if (IS_ERR(pcie->clk)) { - dev_err(&pdev->dev, "could not get clock\n"); + dev_warn(&pdev->dev, "could not get clock\n"); pcie->clk = NULL; } pcie->base = base; @@ -1755,7 +1778,8 @@ static int brcm_pcie_probe(struct platfo ret = clk_prepare_enable(pcie->clk); if (ret) { - dev_err(&pdev->dev, "could not enable clock\n"); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "could not enable clock\n"); return ret; } @@ -1818,7 +1842,6 @@ static struct platform_driver brcm_pcie_ .remove = brcm_pcie_remove, .driver = { .name = "brcm-pcie", - .owner = THIS_MODULE, .of_match_table = brcm_pcie_match, .pm = &brcm_pcie_pm_ops, }, --- a/drivers/soc/bcm/brcmstb/Makefile +++ b/drivers/soc/bcm/brcmstb/Makefile @@ -1,2 +1,2 @@ -obj-y += common.o biuctrl.o +obj-y += common.o biuctrl.o memory.o obj-$(CONFIG_BRCMSTB_PM) += pm/ --- /dev/null +++ b/drivers/soc/bcm/brcmstb/memory.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright © 2015-2017 Broadcom */ + +#include +#include +#include +#include +#include +#include +#include + +/* Macro to help extract property data */ +#define DT_PROP_DATA_TO_U32(b, offs) (fdt32_to_cpu(*(u32 *)(b + offs))) + +/* Constants used when retrieving memc info */ +#define NUM_BUS_RANGES 10 +#define BUS_RANGE_ULIMIT_SHIFT 4 +#define BUS_RANGE_LLIMIT_SHIFT 4 +#define BUS_RANGE_PA_SHIFT 12 + +enum { + BUSNUM_MCP0 = 0x4, + BUSNUM_MCP1 = 0x5, + BUSNUM_MCP2 = 0x6, +}; + +/* + * If the DT nodes are handy, determine which MEMC holds the specified + * physical address. + */ +#ifdef CONFIG_ARCH_BRCMSTB +int __brcmstb_memory_phys_addr_to_memc(phys_addr_t pa, void __iomem *base) +{ + int memc = -1; + int i; + + for (i = 0; i < NUM_BUS_RANGES; i++, base += 8) { + const u64 ulimit_raw = readl(base); + const u64 llimit_raw = readl(base + 4); + const u64 ulimit = + ((ulimit_raw >> BUS_RANGE_ULIMIT_SHIFT) + << BUS_RANGE_PA_SHIFT) | 0xfff; + const u64 llimit = (llimit_raw >> BUS_RANGE_LLIMIT_SHIFT) + << BUS_RANGE_PA_SHIFT; + const u32 busnum = (u32)(ulimit_raw & 0xf); + + if (pa >= llimit && pa <= ulimit) { + if (busnum >= BUSNUM_MCP0 && busnum <= BUSNUM_MCP2) { + memc = busnum - BUSNUM_MCP0; + break; + } + } + } + + return memc; +} + +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa) +{ + int memc = -1; + struct device_node *np; + void __iomem *cpubiuctrl; + + np = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl"); + if (!np) + return memc; + + cpubiuctrl = of_iomap(np, 0); + if (!cpubiuctrl) + goto cleanup; + + memc = __brcmstb_memory_phys_addr_to_memc(pa, cpubiuctrl); + iounmap(cpubiuctrl); + +cleanup: + of_node_put(np); + + return memc; +} + +#elif defined(CONFIG_MIPS) +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa) +{ + /* The logic here is fairly simple and hardcoded: if pa <= 0x5000_0000, + * then this is MEMC0, else MEMC1. + * + * For systems with 2GB on MEMC0, MEMC1 starts at 9000_0000, with 1GB + * on MEMC0, MEMC1 starts at 6000_0000. + */ + if (pa >= 0x50000000ULL) + return 1; + else + return 0; +} +#endif + +u64 brcmstb_memory_memc_size(int memc) +{ + const void *fdt = initial_boot_params; + const int mem_offset = fdt_path_offset(fdt, "/memory"); + int addr_cells = 1, size_cells = 1; + const struct fdt_property *prop; + int proplen, cellslen; + u64 memc_size = 0; + int i; + + /* Get root size and address cells if specified */ + prop = fdt_get_property(fdt, 0, "#size-cells", &proplen); + if (prop) + size_cells = DT_PROP_DATA_TO_U32(prop->data, 0); + + prop = fdt_get_property(fdt, 0, "#address-cells", &proplen); + if (prop) + addr_cells = DT_PROP_DATA_TO_U32(prop->data, 0); + + if (mem_offset < 0) + return -1; + + prop = fdt_get_property(fdt, mem_offset, "reg", &proplen); + cellslen = (int)sizeof(u32) * (addr_cells + size_cells); + if ((proplen % cellslen) != 0) + return -1; + + for (i = 0; i < proplen / cellslen; ++i) { + u64 addr = 0; + u64 size = 0; + int memc_idx; + int j; + + for (j = 0; j < addr_cells; ++j) { + int offset = (cellslen * i) + (sizeof(u32) * j); + + addr |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) << + ((addr_cells - j - 1) * 32); + } + for (j = 0; j < size_cells; ++j) { + int offset = (cellslen * i) + + (sizeof(u32) * (j + addr_cells)); + + size |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) << + ((size_cells - j - 1) * 32); + } + + if ((phys_addr_t)addr != addr) { + pr_err("phys_addr_t is smaller than provided address 0x%llx!\n", + addr); + return -1; + } + + memc_idx = brcmstb_memory_phys_addr_to_memc((phys_addr_t)addr); + if (memc_idx == memc) + memc_size += size; + } + + return memc_size; +} +EXPORT_SYMBOL_GPL(brcmstb_memory_memc_size); +