From e657f5479bc871209287e26432f013fc395336ab Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Wed, 8 Jul 2015 14:48:57 +0100 Subject: [PATCH 106/203] vchiq_arm: Two cacheing fixes 1) Make fragment size vary with cache line size Without this patch, non-cache-line-aligned transfers may corrupt (or be corrupted by) adjacent data structures. Both ARM and VC need to be updated to enable this feature. This is ensured by having the loader apply a new DT parameter - cache-line-size. The existence of this parameter guarantees that the kernel is capable, and the parameter will only be modified from the safe default if the loader is capable. 2) Flush/invalidate vmalloc'd memory, and invalidate after reads --- arch/arm/boot/dts/bcm2708_common.dtsi | 5 + .../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++-------- 2 files changed, 77 insertions(+), 40 deletions(-) --- a/arch/arm/boot/dts/bcm2708_common.dtsi +++ b/arch/arm/boot/dts/bcm2708_common.dtsi @@ -218,6 +218,7 @@ compatible = "brcm,bcm2835-vchiq"; reg = <0x7e00b840 0xf>; interrupts = <0 2>; + cache-line-size = <32>; }; thermal: thermal { @@ -270,4 +271,8 @@ clock-frequency = <126000000>; }; }; + + __overrides__ { + cache_line_size = <&vchiq>, "cache-line-size:0"; + }; }; --- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) @@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct { } VCHIQ_2835_ARM_STATE_T; static void __iomem *g_regs; -static FRAGMENTS_T *g_fragments_base; -static FRAGMENTS_T *g_free_fragments; +static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE); +static unsigned int g_fragments_size; +static char *g_fragments_base; +static char *g_free_fragments; static struct semaphore g_free_fragments_sema; static unsigned long g_virt_to_bus_offset; @@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_ g_virt_to_bus_offset = virt_to_dma(dev, (void *)0); + (void)of_property_read_u32(dev->of_node, "cache-line-size", + &g_cache_line_size); + g_fragments_size = 2 * g_cache_line_size; + /* Allocate space for the channels in coherent memory */ slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); - frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS); + frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size, &slot_phys, GFP_KERNEL); @@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_ vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = MAX_FRAGMENTS; - g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size); + g_fragments_base = (char *)slot_mem + slot_mem_size; slot_mem_size += frag_mem_size; g_free_fragments = g_fragments_base; for (i = 0; i < (MAX_FRAGMENTS - 1); i++) { - *(FRAGMENTS_T **)&g_fragments_base[i] = - &g_fragments_base[i + 1]; + *(char **)&g_fragments_base[i*g_fragments_size] = + &g_fragments_base[(i + 1)*g_fragments_size]; } - *(FRAGMENTS_T **)&g_fragments_base[i] = NULL; + *(char **)&g_fragments_base[i * g_fragments_size] = NULL; sema_init(&g_free_fragments_sema, MAX_FRAGMENTS); if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS) @@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id ** cached area. ** N.B. This implementation plays slightly fast and loose with the Linux -** driver programming rules, e.g. its use of __virt_to_bus instead of +** driver programming rules, e.g. its use of dmac_map_area instead of ** dma_map_single, but it isn't a multi-platform driver and it benefits ** from increased speed as a result. */ @@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t { PAGELIST_T *pagelist; struct page **pages; - struct page *page; unsigned long *addrs; unsigned int num_pages, offset, i; char *addr, *base_addr, *next_addr; @@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t pages = (struct page **)(addrs + num_pages + 1); if (is_vmalloc_addr(buf)) { - for (actual_pages = 0; actual_pages < num_pages; actual_pages++) { - pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE)); + int dir = (type == PAGELIST_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE; + unsigned long length = pagelist->length; + unsigned int offset = pagelist->offset; + + for (actual_pages = 0; actual_pages < num_pages; + actual_pages++) { + struct page *pg = vmalloc_to_page(buf + (actual_pages * + PAGE_SIZE)); + size_t bytes = PAGE_SIZE - offset; + + if (bytes > length) + bytes = length; + pages[actual_pages] = pg; + dmac_map_area(page_address(pg) + offset, bytes, dir); + length -= bytes; + offset = 0; } - *need_release = 0; /* do not try and release vmalloc pages */ + *need_release = 0; /* do not try and release vmalloc pages */ } else { down_read(&task->mm->mmap_sem); actual_pages = get_user_pages(task, task->mm, @@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t actual_pages = -ENOMEM; return actual_pages; } - *need_release = 1; /* release user pages */ + *need_release = 1; /* release user pages */ } pagelist->length = count; @@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t /* Partial cache lines (fragments) require special measures */ if ((type == PAGELIST_READ) && - ((pagelist->offset & (CACHE_LINE_SIZE - 1)) || + ((pagelist->offset & (g_cache_line_size - 1)) || ((pagelist->offset + pagelist->length) & - (CACHE_LINE_SIZE - 1)))) { - FRAGMENTS_T *fragments; + (g_cache_line_size - 1)))) { + char *fragments; if (down_interruptible(&g_free_fragments_sema) != 0) { kfree(pagelist); @@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t WARN_ON(g_free_fragments == NULL); down(&g_free_fragments_mutex); - fragments = (FRAGMENTS_T *) g_free_fragments; + fragments = g_free_fragments; WARN_ON(fragments == NULL); - g_free_fragments = *(FRAGMENTS_T **) g_free_fragments; + g_free_fragments = *(char **) g_free_fragments; up(&g_free_fragments_mutex); - pagelist->type = - PAGELIST_READ_WITH_FRAGMENTS + (fragments - - g_fragments_base); + pagelist->type = PAGELIST_READ_WITH_FRAGMENTS + + (fragments - g_fragments_base) / g_fragments_size; } - for (page = virt_to_page(pagelist); - page <= virt_to_page(addrs + num_pages - 1); page++) { - flush_dcache_page(page); - } + dmac_flush_range(pagelist, addrs + num_pages); *ppagelist = pagelist; @@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { - FRAGMENTS_T *fragments = g_fragments_base + - (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); + char *fragments = g_fragments_base + + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) * + g_fragments_size; int head_bytes, tail_bytes; - head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & - (CACHE_LINE_SIZE - 1); + head_bytes = (g_cache_line_size - pagelist->offset) & + (g_cache_line_size - 1); tail_bytes = (pagelist->offset + actual) & - (CACHE_LINE_SIZE - 1); + (g_cache_line_size - 1); if ((actual >= 0) && (head_bytes != 0)) { if (head_bytes > actual) @@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int memcpy((char *)page_address(pages[0]) + pagelist->offset, - fragments->headbuf, + fragments, head_bytes); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { memcpy((char *)page_address(pages[num_pages - 1]) + ((pagelist->offset + actual) & - (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)), - fragments->tailbuf, tail_bytes); + (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)), + fragments + g_cache_line_size, + tail_bytes); } down(&g_free_fragments_mutex); - *(FRAGMENTS_T **) fragments = g_free_fragments; + *(char **)fragments = g_free_fragments; g_free_fragments = fragments; up(&g_free_fragments_mutex); up(&g_free_fragments_sema); } - if (*need_release) { + if (*need_release) { + unsigned int length = pagelist->length; + unsigned int offset = pagelist->offset; + for (i = 0; i < num_pages; i++) { - if (pagelist->type != PAGELIST_WRITE) - set_page_dirty(pages[i]); + struct page *pg = pages[i]; - page_cache_release(pages[i]); + if (pagelist->type != PAGELIST_WRITE) { + unsigned int bytes = PAGE_SIZE - offset; + + if (bytes > length) + bytes = length; + dmac_unmap_area(page_address(pg) + offset, + bytes, DMA_FROM_DEVICE); + length -= bytes; + offset = 0; + set_page_dirty(pg); + } + page_cache_release(pg); } - } + } kfree(pagelist); }