From f07e572f6447465d8938679533d604e402b0f066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 18 Feb 2021 18:04:33 +0100 Subject: bcm27xx: import latest patches from the RPi foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bcm2708: boot tested on RPi B+ v1.2 bcm2709: boot tested on RPi 3B v1.2 and RPi 4B v1.1 4G bcm2710: boot tested on RPi 3B v1.2 bcm2711: boot tested on RPi 4B v1.1 4G Signed-off-by: Álvaro Fernández Rojas --- ...ing-treat-dev-bus_dma_mask-as-a-DMA-limit.patch | 366 +++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 target/linux/bcm27xx/patches-5.4/950-0445-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch (limited to 'target/linux/bcm27xx/patches-5.4/950-0445-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch') diff --git a/target/linux/bcm27xx/patches-5.4/950-0445-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch b/target/linux/bcm27xx/patches-5.4/950-0445-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch new file mode 100644 index 0000000000..d968e93153 --- /dev/null +++ b/target/linux/bcm27xx/patches-5.4/950-0445-dma-mapping-treat-dev-bus_dma_mask-as-a-DMA-limit.patch @@ -0,0 +1,366 @@ +From d5430c466b3c3b5f631ee37be333a40924575b72 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Thu, 21 Nov 2019 10:26:44 +0100 +Subject: [PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit + +commit a7ba70f1787f977f970cd116076c6fce4b9e01cc upstream. + +Using a mask to represent bus DMA constraints has a set of limitations. +The biggest one being it can only hold a power of two (minus one). The +DMA mapping code is already aware of this and treats dev->bus_dma_mask +as a limit. This quirk is already used by some architectures although +still rare. + +With the introduction of the Raspberry Pi 4 we've found a new contender +for the use of bus DMA limits, as its PCIe bus can only address the +lower 3GB of memory (of a total of 4GB). This is impossible to represent +with a mask. To make things worse the device-tree code rounds non power +of two bus DMA limits to the next power of two, which is unacceptable in +this case. + +In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all +over the tree and treat it as such. Note that dev->bus_dma_limit should +contain the higher accessible DMA address. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Robin Murphy +Signed-off-by: Christoph Hellwig +--- + arch/mips/pci/fixup-sb1250.c | 16 ++++++++-------- + arch/powerpc/sysdev/fsl_pci.c | 6 +++--- + arch/x86/kernel/pci-dma.c | 2 +- + arch/x86/mm/mem_encrypt.c | 2 +- + arch/x86/pci/sta2x11-fixup.c | 2 +- + drivers/acpi/arm64/iort.c | 20 +++++++------------- + drivers/ata/ahci.c | 2 +- + drivers/iommu/dma-iommu.c | 3 +-- + drivers/of/device.c | 9 +++++---- + include/linux/device.h | 6 +++--- + include/linux/dma-direct.h | 2 +- + include/linux/dma-mapping.h | 2 +- + kernel/dma/direct.c | 27 +++++++++++++-------------- + 13 files changed, 46 insertions(+), 53 deletions(-) + +--- a/arch/mips/pci/fixup-sb1250.c ++++ b/arch/mips/pci/fixup-sb1250.c +@@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SI + + /* + * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit +- * bus, so we set the bus's DMA mask accordingly. However the HT link ++ * bus, so we set the bus's DMA limit accordingly. However the HT link + * down the artificial PCI-HT bridge supports 40-bit addressing and the + * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus + * width, so we record the PCI-HT bridge's secondary and subordinate bus +- * numbers and do not set the mask for devices present in the inclusive ++ * numbers and do not set the limit for devices present in the inclusive + * range of those. + */ +-struct sb1250_bus_dma_mask_exclude { ++struct sb1250_bus_dma_limit_exclude { + bool set; + unsigned char start; + unsigned char end; + }; + +-static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data) ++static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data) + { +- struct sb1250_bus_dma_mask_exclude *exclude = data; ++ struct sb1250_bus_dma_limit_exclude *exclude = data; + bool exclude_this; + bool ht_bridge; + +@@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pc + exclude->start, exclude->end); + } else { + dev_dbg(&dev->dev, "disabling DAC for device"); +- dev->dev.bus_dma_mask = DMA_BIT_MASK(32); ++ dev->dev.bus_dma_limit = DMA_BIT_MASK(32); + } + + return 0; +@@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pc + + static void quirk_sb1250_pci_dac(struct pci_dev *dev) + { +- struct sb1250_bus_dma_mask_exclude exclude = { .set = false }; ++ struct sb1250_bus_dma_limit_exclude exclude = { .set = false }; + +- pci_walk_bus(dev->bus, sb1250_bus_dma_mask, &exclude); ++ pci_walk_bus(dev->bus, sb1250_bus_dma_limit, &exclude); + } + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, + quirk_sb1250_pci_dac); +--- a/arch/powerpc/sysdev/fsl_pci.c ++++ b/arch/powerpc/sysdev/fsl_pci.c +@@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(st + { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + +- pdev->dev.bus_dma_mask = +- hose->dma_window_base_cur + hose->dma_window_size; ++ pdev->dev.bus_dma_limit = ++ hose->dma_window_base_cur + hose->dma_window_size - 1; + } + + static void setup_swiotlb_ops(struct pci_controller *hose) +@@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct + * mapping that allows addressing any RAM address from across PCI. + */ + if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) { +- dev->bus_dma_mask = 0; ++ dev->bus_dma_limit = 0; + dev->archdata.dma_offset = pci64_dma_offset; + } + } +--- a/arch/x86/kernel/pci-dma.c ++++ b/arch/x86/kernel/pci-dma.c +@@ -146,7 +146,7 @@ rootfs_initcall(pci_iommu_init); + + static int via_no_dac_cb(struct pci_dev *pdev, void *data) + { +- pdev->dev.bus_dma_mask = DMA_BIT_MASK(32); ++ pdev->dev.bus_dma_limit = DMA_BIT_MASK(32); + return 0; + } + +--- a/arch/x86/mm/mem_encrypt.c ++++ b/arch/x86/mm/mem_encrypt.c +@@ -367,7 +367,7 @@ bool force_dma_unencrypted(struct device + if (sme_active()) { + u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); + u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, +- dev->bus_dma_mask); ++ dev->bus_dma_limit); + + if (dma_dev_mask <= dma_enc_mask) + return true; +--- a/arch/x86/pci/sta2x11-fixup.c ++++ b/arch/x86/pci/sta2x11-fixup.c +@@ -143,7 +143,7 @@ static void sta2x11_map_ep(struct pci_de + + dev->dma_pfn_offset = PFN_DOWN(-amba_base); + +- dev->bus_dma_mask = max_amba_addr; ++ dev->bus_dma_limit = max_amba_addr; + pci_set_consistent_dma_mask(pdev, max_amba_addr); + pci_set_dma_mask(pdev, max_amba_addr); + +--- a/drivers/acpi/arm64/iort.c ++++ b/drivers/acpi/arm64/iort.c +@@ -1062,8 +1062,8 @@ static int rc_dma_get_range(struct devic + */ + void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) + { +- u64 mask, dmaaddr = 0, size = 0, offset = 0; +- int ret, msb; ++ u64 end, mask, dmaaddr = 0, size = 0, offset = 0; ++ int ret; + + /* + * If @dev is expected to be DMA-capable then the bus code that created +@@ -1090,19 +1090,13 @@ void iort_dma_setup(struct device *dev, + } + + if (!ret) { +- msb = fls64(dmaaddr + size - 1); + /* +- * Round-up to the power-of-two mask or set +- * the mask to the whole 64-bit address space +- * in case the DMA region covers the full +- * memory window. ++ * Limit coherent and dma mask based on size retrieved from ++ * firmware. + */ +- mask = msb == 64 ? U64_MAX : (1ULL << msb) - 1; +- /* +- * Limit coherent and dma mask based on size +- * retrieved from firmware. +- */ +- dev->bus_dma_mask = mask; ++ end = dmaaddr + size - 1; ++ mask = DMA_BIT_MASK(ilog2(end) + 1); ++ dev->bus_dma_limit = end; + dev->coherent_dma_mask = mask; + *dev->dma_mask = mask; + } +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -900,7 +900,7 @@ static int ahci_configure_dma_masks(stru + * value, don't extend it here. This happens on STA2X11, for example. + * + * XXX: manipulating the DMA mask from platform code is completely +- * bogus, platform code should use dev->bus_dma_mask instead.. ++ * bogus, platform code should use dev->bus_dma_limit instead.. + */ + if (pdev->dma_mask && pdev->dma_mask < DMA_BIT_MASK(32)) + return 0; +--- a/drivers/iommu/dma-iommu.c ++++ b/drivers/iommu/dma-iommu.c +@@ -404,8 +404,7 @@ static dma_addr_t iommu_dma_alloc_iova(s + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); + +- if (dev->bus_dma_mask) +- dma_limit &= dev->bus_dma_mask; ++ dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); + + if (domain->geometry.force_aperture) + dma_limit = min(dma_limit, domain->geometry.aperture_end); +--- a/drivers/of/device.c ++++ b/drivers/of/device.c +@@ -93,7 +93,7 @@ int of_dma_configure(struct device *dev, + bool coherent; + unsigned long offset; + const struct iommu_ops *iommu; +- u64 mask; ++ u64 mask, end; + + ret = of_dma_get_range(np, &dma_addr, &paddr, &size); + if (ret < 0) { +@@ -148,12 +148,13 @@ int of_dma_configure(struct device *dev, + * Limit coherent and dma mask based on size and default mask + * set by the driver. + */ +- mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1); ++ end = dma_addr + size - 1; ++ mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->coherent_dma_mask &= mask; + *dev->dma_mask &= mask; +- /* ...but only set bus mask if we found valid dma-ranges earlier */ ++ /* ...but only set bus limit if we found valid dma-ranges earlier */ + if (!ret) +- dev->bus_dma_mask = mask; ++ dev->bus_dma_limit = end; + + coherent = of_dma_is_coherent(np); + dev_dbg(dev, "device is%sdma coherent\n", +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -1186,8 +1186,8 @@ struct dev_links_info { + * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all + * hardware supports 64-bit addresses for consistent allocations + * such descriptors. +- * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA +- * limit than the device itself supports. ++ * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller ++ * DMA limit than the device itself supports. + * @dma_pfn_offset: offset of DMA memory range relatively of RAM + * @dma_parms: A low level driver may set these to teach IOMMU code about + * segment limitations. +@@ -1270,7 +1270,7 @@ struct device { + not all hardware supports + 64 bit addresses for consistent + allocations such descriptors. */ +- u64 bus_dma_mask; /* upstream dma_mask constraint */ ++ u64 bus_dma_limit; /* upstream dma constraint */ + unsigned long dma_pfn_offset; + + struct device_dma_parameters *dma_parms; +--- a/include/linux/dma-direct.h ++++ b/include/linux/dma-direct.h +@@ -63,7 +63,7 @@ static inline bool dma_capable(struct de + min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) + return false; + +- return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); ++ return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_limit); + } + + u64 dma_direct_get_required_mask(struct device *dev); +--- a/include/linux/dma-mapping.h ++++ b/include/linux/dma-mapping.h +@@ -697,7 +697,7 @@ static inline int dma_coerce_mask_and_co + */ + static inline bool dma_addressing_limited(struct device *dev) + { +- return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) < ++ return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev); + } + +--- a/kernel/dma/direct.c ++++ b/kernel/dma/direct.c +@@ -26,10 +26,10 @@ static void report_addr(struct device *d + { + if (!dev->dma_mask) { + dev_err_once(dev, "DMA map on device without dma_mask\n"); +- } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { ++ } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) { + dev_err_once(dev, +- "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", +- &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); ++ "overflow %pad+%zu of DMA mask %llx bus limit %llx\n", ++ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + } + WARN_ON_ONCE(1); + } +@@ -51,15 +51,14 @@ u64 dma_direct_get_required_mask(struct + } + + static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, +- u64 *phys_mask) ++ u64 *phys_limit) + { +- if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) +- dma_mask = dev->bus_dma_mask; ++ u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); + + if (force_dma_unencrypted(dev)) +- *phys_mask = __dma_to_phys(dev, dma_mask); ++ *phys_limit = __dma_to_phys(dev, dma_limit); + else +- *phys_mask = dma_to_phys(dev, dma_mask); ++ *phys_limit = dma_to_phys(dev, dma_limit); + + /* + * Optimistically try the zone that the physical address mask falls +@@ -69,9 +68,9 @@ static gfp_t __dma_direct_optimal_gfp_ma + * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding + * zones. + */ +- if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits)) ++ if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + return GFP_DMA; +- if (*phys_mask <= DMA_BIT_MASK(32)) ++ if (*phys_limit <= DMA_BIT_MASK(32)) + return GFP_DMA32; + return 0; + } +@@ -79,7 +78,7 @@ static gfp_t __dma_direct_optimal_gfp_ma + static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) + { + return phys_to_dma_direct(dev, phys) + size - 1 <= +- min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); ++ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + } + + struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, +@@ -88,7 +87,7 @@ struct page *__dma_direct_alloc_pages(st + size_t alloc_size = PAGE_ALIGN(size); + int node = dev_to_node(dev); + struct page *page = NULL; +- u64 phys_mask; ++ u64 phys_limit; + + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; +@@ -96,7 +95,7 @@ struct page *__dma_direct_alloc_pages(st + /* we always manually zero the memory once we are done: */ + gfp &= ~__GFP_ZERO; + gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, +- &phys_mask); ++ &phys_limit); + page = dma_alloc_contiguous(dev, alloc_size, gfp); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_free_contiguous(dev, page, alloc_size); +@@ -110,7 +109,7 @@ again: + page = NULL; + + if (IS_ENABLED(CONFIG_ZONE_DMA32) && +- phys_mask < DMA_BIT_MASK(64) && ++ phys_limit < DMA_BIT_MASK(64) && + !(gfp & (GFP_DMA32 | GFP_DMA))) { + gfp |= GFP_DMA32; + goto again; -- cgit v1.2.3