aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch')
-rw-r--r--target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch268
1 files changed, 268 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch b/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch
new file mode 100644
index 0000000..6a69019
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch
@@ -0,0 +1,268 @@
+From e657f5479bc871209287e26432f013fc395336ab Mon Sep 17 00:00:00 2001
+From: Phil Elwell <phil@raspberrypi.org>
+Date: Wed, 8 Jul 2015 14:48:57 +0100
+Subject: [PATCH 106/203] vchiq_arm: Two cacheing fixes
+
+1) Make fragment size vary with cache line size
+Without this patch, non-cache-line-aligned transfers may corrupt
+(or be corrupted by) adjacent data structures.
+
+Both ARM and VC need to be updated to enable this feature. This is
+ensured by having the loader apply a new DT parameter -
+cache-line-size. The existence of this parameter guarantees that the
+kernel is capable, and the parameter will only be modified from the
+safe default if the loader is capable.
+
+2) Flush/invalidate vmalloc'd memory, and invalidate after reads
+---
+ arch/arm/boot/dts/bcm2708_common.dtsi | 5 +
+ .../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++--------
+ 2 files changed, 77 insertions(+), 40 deletions(-)
+
+--- a/arch/arm/boot/dts/bcm2708_common.dtsi
++++ b/arch/arm/boot/dts/bcm2708_common.dtsi
+@@ -218,6 +218,7 @@
+ compatible = "brcm,bcm2835-vchiq";
+ reg = <0x7e00b840 0xf>;
+ interrupts = <0 2>;
++ cache-line-size = <32>;
+ };
+
+ thermal: thermal {
+@@ -270,4 +271,8 @@
+ clock-frequency = <126000000>;
+ };
+ };
++
++ __overrides__ {
++ cache_line_size = <&vchiq>, "cache-line-size:0";
++ };
+ };
+--- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
++++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+@@ -42,6 +42,7 @@
+ #include <linux/platform_data/mailbox-bcm2708.h>
+ #include <linux/platform_device.h>
+ #include <linux/uaccess.h>
++#include <linux/of.h>
+ #include <asm/pgtable.h>
+
+ #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
+@@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
+ } VCHIQ_2835_ARM_STATE_T;
+
+ static void __iomem *g_regs;
+-static FRAGMENTS_T *g_fragments_base;
+-static FRAGMENTS_T *g_free_fragments;
++static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
++static unsigned int g_fragments_size;
++static char *g_fragments_base;
++static char *g_free_fragments;
+ static struct semaphore g_free_fragments_sema;
+ static unsigned long g_virt_to_bus_offset;
+
+@@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_
+
+ g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
+
++ (void)of_property_read_u32(dev->of_node, "cache-line-size",
++ &g_cache_line_size);
++ g_fragments_size = 2 * g_cache_line_size;
++
+ /* Allocate space for the channels in coherent memory */
+ slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
+- frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
++ frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
+
+ slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
+ &slot_phys, GFP_KERNEL);
+@@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_
+ vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
+ MAX_FRAGMENTS;
+
+- g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
++ g_fragments_base = (char *)slot_mem + slot_mem_size;
+ slot_mem_size += frag_mem_size;
+
+ g_free_fragments = g_fragments_base;
+ for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
+- *(FRAGMENTS_T **)&g_fragments_base[i] =
+- &g_fragments_base[i + 1];
++ *(char **)&g_fragments_base[i*g_fragments_size] =
++ &g_fragments_base[(i + 1)*g_fragments_size];
+ }
+- *(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
++ *(char **)&g_fragments_base[i * g_fragments_size] = NULL;
+ sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
+
+ if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
+@@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id
+ ** cached area.
+
+ ** N.B. This implementation plays slightly fast and loose with the Linux
+-** driver programming rules, e.g. its use of __virt_to_bus instead of
++** driver programming rules, e.g. its use of dmac_map_area instead of
+ ** dma_map_single, but it isn't a multi-platform driver and it benefits
+ ** from increased speed as a result.
+ */
+@@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t
+ {
+ PAGELIST_T *pagelist;
+ struct page **pages;
+- struct page *page;
+ unsigned long *addrs;
+ unsigned int num_pages, offset, i;
+ char *addr, *base_addr, *next_addr;
+@@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t
+ pages = (struct page **)(addrs + num_pages + 1);
+
+ if (is_vmalloc_addr(buf)) {
+- for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
+- pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
++ int dir = (type == PAGELIST_WRITE) ?
++ DMA_TO_DEVICE : DMA_FROM_DEVICE;
++ unsigned long length = pagelist->length;
++ unsigned int offset = pagelist->offset;
++
++ for (actual_pages = 0; actual_pages < num_pages;
++ actual_pages++) {
++ struct page *pg = vmalloc_to_page(buf + (actual_pages *
++ PAGE_SIZE));
++ size_t bytes = PAGE_SIZE - offset;
++
++ if (bytes > length)
++ bytes = length;
++ pages[actual_pages] = pg;
++ dmac_map_area(page_address(pg) + offset, bytes, dir);
++ length -= bytes;
++ offset = 0;
+ }
+- *need_release = 0; /* do not try and release vmalloc pages */
++ *need_release = 0; /* do not try and release vmalloc pages */
+ } else {
+ down_read(&task->mm->mmap_sem);
+ actual_pages = get_user_pages(task, task->mm,
+@@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t
+ actual_pages = -ENOMEM;
+ return actual_pages;
+ }
+- *need_release = 1; /* release user pages */
++ *need_release = 1; /* release user pages */
+ }
+
+ pagelist->length = count;
+@@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t
+
+ /* Partial cache lines (fragments) require special measures */
+ if ((type == PAGELIST_READ) &&
+- ((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
++ ((pagelist->offset & (g_cache_line_size - 1)) ||
+ ((pagelist->offset + pagelist->length) &
+- (CACHE_LINE_SIZE - 1)))) {
+- FRAGMENTS_T *fragments;
++ (g_cache_line_size - 1)))) {
++ char *fragments;
+
+ if (down_interruptible(&g_free_fragments_sema) != 0) {
+ kfree(pagelist);
+@@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t
+ WARN_ON(g_free_fragments == NULL);
+
+ down(&g_free_fragments_mutex);
+- fragments = (FRAGMENTS_T *) g_free_fragments;
++ fragments = g_free_fragments;
+ WARN_ON(fragments == NULL);
+- g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
++ g_free_fragments = *(char **) g_free_fragments;
+ up(&g_free_fragments_mutex);
+- pagelist->type =
+- PAGELIST_READ_WITH_FRAGMENTS + (fragments -
+- g_fragments_base);
++ pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
++ (fragments - g_fragments_base) / g_fragments_size;
+ }
+
+- for (page = virt_to_page(pagelist);
+- page <= virt_to_page(addrs + num_pages - 1); page++) {
+- flush_dcache_page(page);
+- }
++ dmac_flush_range(pagelist, addrs + num_pages);
+
+ *ppagelist = pagelist;
+
+@@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int
+
+ /* Deal with any partial cache lines (fragments) */
+ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
+- FRAGMENTS_T *fragments = g_fragments_base +
+- (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
++ char *fragments = g_fragments_base +
++ (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
++ g_fragments_size;
+ int head_bytes, tail_bytes;
+- head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
+- (CACHE_LINE_SIZE - 1);
++ head_bytes = (g_cache_line_size - pagelist->offset) &
++ (g_cache_line_size - 1);
+ tail_bytes = (pagelist->offset + actual) &
+- (CACHE_LINE_SIZE - 1);
++ (g_cache_line_size - 1);
+
+ if ((actual >= 0) && (head_bytes != 0)) {
+ if (head_bytes > actual)
+@@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int
+
+ memcpy((char *)page_address(pages[0]) +
+ pagelist->offset,
+- fragments->headbuf,
++ fragments,
+ head_bytes);
+ }
+ if ((actual >= 0) && (head_bytes < actual) &&
+ (tail_bytes != 0)) {
+ memcpy((char *)page_address(pages[num_pages - 1]) +
+ ((pagelist->offset + actual) &
+- (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
+- fragments->tailbuf, tail_bytes);
++ (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
++ fragments + g_cache_line_size,
++ tail_bytes);
+ }
+
+ down(&g_free_fragments_mutex);
+- *(FRAGMENTS_T **) fragments = g_free_fragments;
++ *(char **)fragments = g_free_fragments;
+ g_free_fragments = fragments;
+ up(&g_free_fragments_mutex);
+ up(&g_free_fragments_sema);
+ }
+
+- if (*need_release) {
++ if (*need_release) {
++ unsigned int length = pagelist->length;
++ unsigned int offset = pagelist->offset;
++
+ for (i = 0; i < num_pages; i++) {
+- if (pagelist->type != PAGELIST_WRITE)
+- set_page_dirty(pages[i]);
++ struct page *pg = pages[i];
+
+- page_cache_release(pages[i]);
++ if (pagelist->type != PAGELIST_WRITE) {
++ unsigned int bytes = PAGE_SIZE - offset;
++
++ if (bytes > length)
++ bytes = length;
++ dmac_unmap_area(page_address(pg) + offset,
++ bytes, DMA_FROM_DEVICE);
++ length -= bytes;
++ offset = 0;
++ set_page_dirty(pg);
++ }
++ page_cache_release(pg);
+ }
+- }
++ }
+
+ kfree(pagelist);
+ }