aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch')
-rw-r--r--target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch569
1 files changed, 0 insertions, 569 deletions
diff --git a/target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch b/target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch
deleted file mode 100644
index 75615e372f..0000000000
--- a/target/linux/bcm27xx/patches-4.19/950-0457-PCI-brcmstb-Add-dma-range-mapping-for-inbound-traffi.patch
+++ /dev/null
@@ -1,569 +0,0 @@
-From d3cc1c713b9436a7dc72788caa1d8de63ac3a01b Mon Sep 17 00:00:00 2001
-From: Phil Elwell <phil@raspberrypi.org>
-Date: Tue, 19 Feb 2019 22:06:59 +0000
-Subject: [PATCH] PCI: brcmstb: Add dma-range mapping for inbound
- traffic
-
-The Broadcom STB PCIe host controller is intimately related to the
-memory subsystem. This close relationship adds complexity to how cpu
-system memory is mapped to PCIe memory. Ideally, this mapping is an
-identity mapping, or an identity mapping off by a constant. Not so in
-this case.
-
-Consider the Broadcom reference board BCM97445LCC_4X8 which has 6 GB
-of system memory. Here is how the PCIe controller maps the
-system memory to PCIe memory:
-
- memc0-a@[ 0....3fffffff] <=> pci@[ 0....3fffffff]
- memc0-b@[100000000...13fffffff] <=> pci@[ 40000000....7fffffff]
- memc1-a@[ 40000000....7fffffff] <=> pci@[ 80000000....bfffffff]
- memc1-b@[300000000...33fffffff] <=> pci@[ c0000000....ffffffff]
- memc2-a@[ 80000000....bfffffff] <=> pci@[100000000...13fffffff]
- memc2-b@[c00000000...c3fffffff] <=> pci@[140000000...17fffffff]
-
-Although there are some "gaps" that can be added between the
-individual mappings by software, the permutation of memory regions for
-the most part is fixed by HW. The solution of having something close
-to an identity mapping is not possible.
-
-The idea behind this HW design is that the same PCIe module can
-act as an RC or EP, and if it acts as an EP it concatenates all
-of system memory into a BAR so anything can be accessed. Unfortunately,
-when the PCIe block is in the role of an RC it also presents this
-"BAR" to downstream PCIe devices, rather than offering an identity map
-between its system memory and PCIe space.
-
-Suppose that an endpoint driver allocs some DMA memory. Suppose this
-memory is located at 0x6000_0000, which is in the middle of memc1-a.
-The driver wants a dma_addr_t value that it can pass on to the EP to
-use. Without doing any custom mapping, the EP will use this value for
-DMA: the driver will get a dma_addr_t equal to 0x6000_0000. But this
-won't work; the device needs a dma_addr_t that reflects the PCIe space
-address, namely 0xa000_0000.
-
-So, essentially the solution to this problem must modify the
-dma_addr_t returned by the DMA routines routines. There are two
-ways (I know of) of doing this:
-
-(a) overriding/redefining the dma_to_phys() and phys_to_dma() calls
-that are used by the dma_ops routines. This is the approach of
-
- arch/mips/cavium-octeon/dma-octeon.c
-
-In ARM and ARM64 these two routines are defiend in asm/dma-mapping.h
-as static inline functions.
-
-(b) Subscribe to a notifier that notifies when a device is added to a
-bus. When this happens, set_dma_ops() can be called for the device.
-This method is mentioned in:
-
- http://lxr.free-electrons.com/source/drivers/of/platform.c?v=3.16#L152
-
-where it says as a comment
-
- "In case if platform code need to use own special DMA
- configuration, it can use Platform bus notifier and
- handle BUS_NOTIFY_ADD_DEVICE event to fix up DMA
- configuration."
-
-Solution (b) is what this commit does. It uses its own set of
-dma_ops which are wrappers around the arch_dma_ops. The
-wrappers translate the dma addresses before/after invoking
-the arch_dma_ops, as appropriate.
-
-Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
----
- drivers/pci/controller/pcie-brcmstb.c | 420 +++++++++++++++++++++++++-
- 1 file changed, 411 insertions(+), 9 deletions(-)
-
---- a/drivers/pci/controller/pcie-brcmstb.c
-+++ b/drivers/pci/controller/pcie-brcmstb.c
-@@ -4,6 +4,7 @@
- #include <linux/clk.h>
- #include <linux/compiler.h>
- #include <linux/delay.h>
-+#include <linux/dma-mapping.h>
- #include <linux/init.h>
- #include <linux/interrupt.h>
- #include <linux/io.h>
-@@ -319,11 +320,307 @@ static struct pci_ops brcm_pcie_ops = {
- ((val & ~reg##_##field##_MASK) | \
- (reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT)))
-
-+static const struct dma_map_ops *arch_dma_ops;
-+static const struct dma_map_ops *brcm_dma_ops_ptr;
-+static struct of_pci_range *dma_ranges;
-+static int num_dma_ranges;
-+
- static phys_addr_t scb_size[BRCM_MAX_SCB];
- static int num_memc;
- static int num_pcie;
- static DEFINE_MUTEX(brcm_pcie_lock);
-
-+static dma_addr_t brcm_to_pci(dma_addr_t addr)
-+{
-+ struct of_pci_range *p;
-+
-+ if (!num_dma_ranges)
-+ return addr;
-+
-+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++)
-+ if (addr >= p->cpu_addr && addr < (p->cpu_addr + p->size))
-+ return addr - p->cpu_addr + p->pci_addr;
-+
-+ return addr;
-+}
-+
-+static dma_addr_t brcm_to_cpu(dma_addr_t addr)
-+{
-+ struct of_pci_range *p;
-+
-+ if (!num_dma_ranges)
-+ return addr;
-+
-+ for (p = dma_ranges; p < &dma_ranges[num_dma_ranges]; p++)
-+ if (addr >= p->pci_addr && addr < (p->pci_addr + p->size))
-+ return addr - p->pci_addr + p->cpu_addr;
-+
-+ return addr;
-+}
-+
-+static void *brcm_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-+ gfp_t gfp, unsigned long attrs)
-+{
-+ void *ret;
-+
-+ ret = arch_dma_ops->alloc(dev, size, handle, gfp, attrs);
-+ if (ret)
-+ *handle = brcm_to_pci(*handle);
-+ return ret;
-+}
-+
-+static void brcm_free(struct device *dev, size_t size, void *cpu_addr,
-+ dma_addr_t handle, unsigned long attrs)
-+{
-+ handle = brcm_to_cpu(handle);
-+ arch_dma_ops->free(dev, size, cpu_addr, handle, attrs);
-+}
-+
-+static int brcm_mmap(struct device *dev, struct vm_area_struct *vma,
-+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
-+ unsigned long attrs)
-+{
-+ dma_addr = brcm_to_cpu(dma_addr);
-+ return arch_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-+}
-+
-+static int brcm_get_sgtable(struct device *dev, struct sg_table *sgt,
-+ void *cpu_addr, dma_addr_t handle, size_t size,
-+ unsigned long attrs)
-+{
-+ handle = brcm_to_cpu(handle);
-+ return arch_dma_ops->get_sgtable(dev, sgt, cpu_addr, handle, size,
-+ attrs);
-+}
-+
-+static dma_addr_t brcm_map_page(struct device *dev, struct page *page,
-+ unsigned long offset, size_t size,
-+ enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ return brcm_to_pci(arch_dma_ops->map_page(dev, page, offset, size,
-+ dir, attrs));
-+}
-+
-+static void brcm_unmap_page(struct device *dev, dma_addr_t handle,
-+ size_t size, enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ handle = brcm_to_cpu(handle);
-+ arch_dma_ops->unmap_page(dev, handle, size, dir, attrs);
-+}
-+
-+static int brcm_map_sg(struct device *dev, struct scatterlist *sgl,
-+ int nents, enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ int i, j;
-+ struct scatterlist *sg;
-+
-+ for_each_sg(sgl, sg, nents, i) {
-+#ifdef CONFIG_NEED_SG_DMA_LENGTH
-+ sg->dma_length = sg->length;
-+#endif
-+ sg->dma_address =
-+ brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset,
-+ sg->length, dir, attrs);
-+ if (dma_mapping_error(dev, sg->dma_address))
-+ goto bad_mapping;
-+ }
-+ return nents;
-+
-+bad_mapping:
-+ for_each_sg(sgl, sg, i, j)
-+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
-+ sg_dma_len(sg), dir, attrs);
-+ return 0;
-+}
-+
-+static void brcm_unmap_sg(struct device *dev,
-+ struct scatterlist *sgl, int nents,
-+ enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ int i;
-+ struct scatterlist *sg;
-+
-+ for_each_sg(sgl, sg, nents, i)
-+ brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
-+ sg_dma_len(sg), dir, attrs);
-+}
-+
-+static void brcm_sync_single_for_cpu(struct device *dev,
-+ dma_addr_t handle, size_t size,
-+ enum dma_data_direction dir)
-+{
-+ handle = brcm_to_cpu(handle);
-+ arch_dma_ops->sync_single_for_cpu(dev, handle, size, dir);
-+}
-+
-+static void brcm_sync_single_for_device(struct device *dev,
-+ dma_addr_t handle, size_t size,
-+ enum dma_data_direction dir)
-+{
-+ handle = brcm_to_cpu(handle);
-+ arch_dma_ops->sync_single_for_device(dev, handle, size, dir);
-+}
-+
-+static dma_addr_t brcm_map_resource(struct device *dev, phys_addr_t phys,
-+ size_t size,
-+ enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ if (arch_dma_ops->map_resource)
-+ return brcm_to_pci(arch_dma_ops->map_resource
-+ (dev, phys, size, dir, attrs));
-+ return brcm_to_pci((dma_addr_t)phys);
-+}
-+
-+static void brcm_unmap_resource(struct device *dev, dma_addr_t handle,
-+ size_t size, enum dma_data_direction dir,
-+ unsigned long attrs)
-+{
-+ if (arch_dma_ops->unmap_resource)
-+ arch_dma_ops->unmap_resource(dev, brcm_to_cpu(handle), size,
-+ dir, attrs);
-+}
-+
-+void brcm_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
-+ int nents, enum dma_data_direction dir)
-+{
-+ struct scatterlist *sg;
-+ int i;
-+
-+ for_each_sg(sgl, sg, nents, i)
-+ brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg),
-+ sg->length, dir);
-+}
-+
-+void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
-+ int nents, enum dma_data_direction dir)
-+{
-+ struct scatterlist *sg;
-+ int i;
-+
-+ for_each_sg(sgl, sg, nents, i)
-+ brcm_dma_ops_ptr->sync_single_for_device(dev,
-+ sg_dma_address(sg),
-+ sg->length, dir);
-+}
-+
-+static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr)
-+{
-+ return arch_dma_ops->mapping_error(dev, dma_addr);
-+}
-+
-+static int brcm_dma_supported(struct device *dev, u64 mask)
-+{
-+ if (num_dma_ranges) {
-+ /*
-+ * It is our translated addresses that the EP will "see", so
-+ * we check all of the ranges for the largest possible value.
-+ */
-+ int i;
-+
-+ for (i = 0; i < num_dma_ranges; i++)
-+ if (dma_ranges[i].pci_addr + dma_ranges[i].size - 1
-+ > mask)
-+ return 0;
-+ return 1;
-+ }
-+
-+ return arch_dma_ops->dma_supported(dev, mask);
-+}
-+
-+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
-+u64 brcm_get_required_mask)(struct device *dev)
-+{
-+ return arch_dma_ops->get_required_mask(dev);
-+}
-+#endif
-+
-+static const struct dma_map_ops brcm_dma_ops = {
-+ .alloc = brcm_alloc,
-+ .free = brcm_free,
-+ .mmap = brcm_mmap,
-+ .get_sgtable = brcm_get_sgtable,
-+ .map_page = brcm_map_page,
-+ .unmap_page = brcm_unmap_page,
-+ .map_sg = brcm_map_sg,
-+ .unmap_sg = brcm_unmap_sg,
-+ .map_resource = brcm_map_resource,
-+ .unmap_resource = brcm_unmap_resource,
-+ .sync_single_for_cpu = brcm_sync_single_for_cpu,
-+ .sync_single_for_device = brcm_sync_single_for_device,
-+ .sync_sg_for_cpu = brcm_sync_sg_for_cpu,
-+ .sync_sg_for_device = brcm_sync_sg_for_device,
-+ .mapping_error = brcm_mapping_error,
-+ .dma_supported = brcm_dma_supported,
-+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
-+ .get_required_mask = brcm_get_required_mask,
-+#endif
-+};
-+
-+static void brcm_set_dma_ops(struct device *dev)
-+{
-+ int ret;
-+
-+ if (IS_ENABLED(CONFIG_ARM64)) {
-+ /*
-+ * We are going to invoke get_dma_ops(). That
-+ * function, at this point in time, invokes
-+ * get_arch_dma_ops(), and for ARM64 that function
-+ * returns a pointer to dummy_dma_ops. So then we'd
-+ * like to call arch_setup_dma_ops(), but that isn't
-+ * exported. Instead, we call of_dma_configure(),
-+ * which is exported, and this calls
-+ * arch_setup_dma_ops(). Once we do this the call to
-+ * get_dma_ops() will work properly because
-+ * dev->dma_ops will be set.
-+ */
-+ ret = of_dma_configure(dev, dev->of_node, true);
-+ if (ret) {
-+ dev_err(dev, "of_dma_configure() failed: %d\n", ret);
-+ return;
-+ }
-+ }
-+
-+ arch_dma_ops = get_dma_ops(dev);
-+ if (!arch_dma_ops) {
-+ dev_err(dev, "failed to get arch_dma_ops\n");
-+ return;
-+ }
-+
-+ set_dma_ops(dev, &brcm_dma_ops);
-+}
-+
-+static int brcmstb_platform_notifier(struct notifier_block *nb,
-+ unsigned long event, void *__dev)
-+{
-+ struct device *dev = __dev;
-+
-+ brcm_dma_ops_ptr = &brcm_dma_ops;
-+ if (event != BUS_NOTIFY_ADD_DEVICE)
-+ return NOTIFY_DONE;
-+
-+ brcm_set_dma_ops(dev);
-+ return NOTIFY_OK;
-+}
-+
-+static struct notifier_block brcmstb_platform_nb = {
-+ .notifier_call = brcmstb_platform_notifier,
-+};
-+
-+static int brcm_register_notifier(void)
-+{
-+ return bus_register_notifier(&pci_bus_type, &brcmstb_platform_nb);
-+}
-+
-+static int brcm_unregister_notifier(void)
-+{
-+ return bus_unregister_notifier(&pci_bus_type, &brcmstb_platform_nb);
-+}
-+
- static u32 rd_fld(void __iomem *p, u32 mask, int shift)
- {
- return (bcm_readl(p) & mask) >> shift;
-@@ -597,9 +894,71 @@ static inline void brcm_pcie_perst_set(s
- WR_FLD_RB(pcie->base, PCIE_MISC_PCIE_CTRL, PCIE_PERSTB, !val);
- }
-
-+static int pci_dma_range_parser_init(struct of_pci_range_parser *parser,
-+ struct device_node *node)
-+{
-+ const int na = 3, ns = 2;
-+ int rlen;
-+
-+ parser->node = node;
-+ parser->pna = of_n_addr_cells(node);
-+ parser->np = parser->pna + na + ns;
-+
-+ parser->range = of_get_property(node, "dma-ranges", &rlen);
-+ if (!parser->range)
-+ return -ENOENT;
-+
-+ parser->end = parser->range + rlen / sizeof(__be32);
-+
-+ return 0;
-+}
-+
-+static int brcm_pcie_parse_map_dma_ranges(struct brcm_pcie *pcie)
-+{
-+ int i;
-+ struct of_pci_range_parser parser;
-+ struct device_node *dn = pcie->dn;
-+
-+ /*
-+ * Parse dma-ranges property if present. If there are multiple
-+ * PCIe controllers, we only have to parse from one of them since
-+ * the others will have an identical mapping.
-+ */
-+ if (!pci_dma_range_parser_init(&parser, dn)) {
-+ unsigned int max_ranges
-+ = (parser.end - parser.range) / parser.np;
-+
-+ dma_ranges = kcalloc(max_ranges, sizeof(struct of_pci_range),
-+ GFP_KERNEL);
-+ if (!dma_ranges)
-+ return -ENOMEM;
-+
-+ for (i = 0; of_pci_range_parser_one(&parser, dma_ranges + i);
-+ i++)
-+ num_dma_ranges++;
-+ }
-+
-+ for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
-+ u64 size = brcmstb_memory_memc_size(i);
-+
-+ if (size == (u64)-1) {
-+ dev_err(pcie->dev, "cannot get memc%d size", i);
-+ return -EINVAL;
-+ } else if (size) {
-+ scb_size[i] = roundup_pow_of_two_64(size);
-+ num_memc++;
-+ } else {
-+ break;
-+ }
-+ }
-+
-+ return 0;
-+}
-+
- static int brcm_pcie_add_controller(struct brcm_pcie *pcie)
- {
- int i, ret = 0;
-+ struct device *dev = pcie->dev;
-
- mutex_lock(&brcm_pcie_lock);
- if (num_pcie > 0) {
-@@ -607,12 +966,21 @@ static int brcm_pcie_add_controller(stru
- goto done;
- }
-
-+ ret = brcm_register_notifier();
-+ if (ret) {
-+ dev_err(dev, "failed to register pci bus notifier\n");
-+ goto done;
-+ }
-+ ret = brcm_pcie_parse_map_dma_ranges(pcie);
-+ if (ret)
-+ goto done;
-+
- /* Determine num_memc and their sizes */
- for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
- u64 size = brcmstb_memory_memc_size(i);
-
- if (size == (u64)-1) {
-- dev_err(pcie->dev, "cannot get memc%d size\n", i);
-+ dev_err(dev, "cannot get memc%d size\n", i);
- ret = -EINVAL;
- goto done;
- } else if (size) {
-@@ -636,8 +1004,16 @@ done:
- static void brcm_pcie_remove_controller(struct brcm_pcie *pcie)
- {
- mutex_lock(&brcm_pcie_lock);
-- if (--num_pcie == 0)
-- num_memc = 0;
-+ if (--num_pcie > 0)
-+ goto out;
-+
-+ if (brcm_unregister_notifier())
-+ dev_err(pcie->dev, "failed to unregister pci bus notifier\n");
-+ kfree(dma_ranges);
-+ dma_ranges = NULL;
-+ num_dma_ranges = 0;
-+ num_memc = 0;
-+out:
- mutex_unlock(&brcm_pcie_lock);
- }
-
-@@ -757,6 +1133,38 @@ static int brcm_pcie_setup(struct brcm_p
- */
- rc_bar2_offset = 0;
-
-+ if (dma_ranges) {
-+ /*
-+ * The best-case scenario is to place the inbound
-+ * region in the first 4GB of pci-space, as some
-+ * legacy devices can only address 32bits.
-+ * We would also like to put the MSI under 4GB
-+ * as well, since some devices require a 32bit
-+ * MSI target address.
-+ */
-+ if (total_mem_size <= 0xc0000000ULL &&
-+ rc_bar2_size <= 0x100000000ULL) {
-+ rc_bar2_offset = 0;
-+ } else {
-+ /*
-+ * The system memory is 4GB or larger so we
-+ * cannot start the inbound region at location
-+ * 0 (since we have to allow some space for
-+ * outbound memory @ 3GB). So instead we
-+ * start it at the 1x multiple of its size
-+ */
-+ rc_bar2_offset = rc_bar2_size;
-+ }
-+
-+ } else {
-+ /*
-+ * Set simple configuration based on memory sizes
-+ * only. We always start the viewport at address 0,
-+ * and set the MSI target address accordingly.
-+ */
-+ rc_bar2_offset = 0;
-+ }
-+
- tmp = lower_32_bits(rc_bar2_offset);
- tmp = INSERT_FIELD(tmp, PCIE_MISC_RC_BAR2_CONFIG_LO, SIZE,
- encode_ibar_size(rc_bar2_size));
-@@ -967,7 +1375,6 @@ static int brcm_pcie_probe(struct platfo
- struct brcm_pcie *pcie;
- struct resource *res;
- void __iomem *base;
-- u32 tmp;
- struct pci_host_bridge *bridge;
- struct pci_bus *child;
-
-@@ -984,11 +1391,6 @@ static int brcm_pcie_probe(struct platfo
- return -EINVAL;
- }
-
-- if (of_property_read_u32(dn, "dma-ranges", &tmp) == 0) {
-- dev_err(&pdev->dev, "cannot yet handle dma-ranges\n");
-- return -EINVAL;
-- }
--
- data = of_id->data;
- pcie->reg_offsets = data->offsets;
- pcie->reg_field_info = data->reg_field_info;