diff options
Diffstat (limited to 'target/linux/brcm2708/patches-3.10/0070-bcm2708_fb-DMA-acceleration-for-fb_copyarea.patch')
-rw-r--r-- | target/linux/brcm2708/patches-3.10/0070-bcm2708_fb-DMA-acceleration-for-fb_copyarea.patch | 246 |
1 files changed, 0 insertions, 246 deletions
diff --git a/target/linux/brcm2708/patches-3.10/0070-bcm2708_fb-DMA-acceleration-for-fb_copyarea.patch b/target/linux/brcm2708/patches-3.10/0070-bcm2708_fb-DMA-acceleration-for-fb_copyarea.patch deleted file mode 100644 index 4f6a857562..0000000000 --- a/target/linux/brcm2708/patches-3.10/0070-bcm2708_fb-DMA-acceleration-for-fb_copyarea.patch +++ /dev/null @@ -1,246 +0,0 @@ -From 370c8243ec8e7f3abd8171b7d2dde170f4c5e63a Mon Sep 17 00:00:00 2001 -From: Siarhei Siamashka <siarhei.siamashka@gmail.com> -Date: Mon, 17 Jun 2013 16:00:25 +0300 -Subject: [PATCH 070/196] bcm2708_fb: DMA acceleration for fb_copyarea - -Based on http://www.raspberrypi.org/phpBB3/viewtopic.php?p=62425#p62425 -Also used Simon's dmaer_master module as a reference for tweaking DMA -settings for better performance. - -For now busylooping only. IRQ support might be added later. -With non-overclocked Raspberry Pi, the performance is ~360 MB/s -for simple copy or ~260 MB/s for two-pass copy (used when dragging -windows to the right). - -In the case of using DMA channel 0, the performance improves -to ~440 MB/s. - -For comparison, VFP optimized CPU copy can only do ~114 MB/s in -the same conditions (hindered by reading uncached source buffer). - -Signed-off-by: Siarhei Siamashka <siarhei.siamashka@gmail.com> ---- - drivers/video/bcm2708_fb.c | 162 ++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 159 insertions(+), 3 deletions(-) - -diff --git a/drivers/video/bcm2708_fb.c b/drivers/video/bcm2708_fb.c -index 08d9238..c10c5ee 100644 ---- a/drivers/video/bcm2708_fb.c -+++ b/drivers/video/bcm2708_fb.c -@@ -28,6 +28,7 @@ - #include <linux/printk.h> - #include <linux/console.h> - -+#include <mach/dma.h> - #include <mach/platform.h> - #include <mach/vcio.h> - -@@ -63,6 +64,11 @@ struct bcm2708_fb { - struct fbinfo_s *info; - dma_addr_t dma; - u32 cmap[16]; -+ int dma_chan; -+ int dma_irq; -+ void __iomem *dma_chan_base; -+ void *cb_base; /* DMA control blocks */ -+ dma_addr_t cb_handle; - }; - - #define to_bcm2708(info) container_of(info, struct bcm2708_fb, fb) -@@ -312,11 +318,133 @@ static void bcm2708_fb_fillrect(struct fb_info *info, - cfb_fillrect(info, rect); - } - -+/* A helper function for configuring dma control block */ -+static void set_dma_cb(struct bcm2708_dma_cb *cb, -+ int burst_size, -+ dma_addr_t dst, -+ int dst_stride, -+ dma_addr_t src, -+ int src_stride, -+ int w, -+ int h) -+{ -+ cb->info = BCM2708_DMA_BURST(burst_size) | BCM2708_DMA_S_WIDTH | -+ BCM2708_DMA_S_INC | BCM2708_DMA_D_WIDTH | -+ BCM2708_DMA_D_INC | BCM2708_DMA_TDMODE; -+ cb->dst = dst; -+ cb->src = src; -+ /* -+ * This is not really obvious from the DMA documentation, -+ * but the top 16 bits must be programmmed to "height -1" -+ * and not "height" in 2D mode. -+ */ -+ cb->length = ((h - 1) << 16) | w; -+ cb->stride = ((dst_stride - w) << 16) | (u16)(src_stride - w); -+ cb->pad[0] = 0; -+ cb->pad[1] = 0; -+} -+ - static void bcm2708_fb_copyarea(struct fb_info *info, - const struct fb_copyarea *region) - { -- /*print_debug("bcm2708_fb_copyarea\n"); */ -- cfb_copyarea(info, region); -+ struct bcm2708_fb *fb = to_bcm2708(info); -+ struct bcm2708_dma_cb *cb = fb->cb_base; -+ int bytes_per_pixel = (info->var.bits_per_pixel + 7) >> 3; -+ /* Channel 0 supports larger bursts and is a bit faster */ -+ int burst_size = (fb->dma_chan == 0) ? 8 : 2; -+ -+ /* Fallback to cfb_copyarea() if we don't like something */ -+ if (bytes_per_pixel > 4 || -+ info->var.xres > 1920 || info->var.yres > 1200 || -+ region->width <= 0 || region->width > info->var.xres || -+ region->height <= 0 || region->height > info->var.yres || -+ region->sx < 0 || region->sx >= info->var.xres || -+ region->sy < 0 || region->sy >= info->var.yres || -+ region->dx < 0 || region->dx >= info->var.xres || -+ region->dy < 0 || region->dy >= info->var.yres || -+ region->sx + region->width > info->var.xres || -+ region->dx + region->width > info->var.xres || -+ region->sy + region->height > info->var.yres || -+ region->dy + region->height > info->var.yres) { -+ cfb_copyarea(info, region); -+ return; -+ } -+ -+ if (region->dy == region->sy && region->dx > region->sx) { -+ /* -+ * A difficult case of overlapped copy. Because DMA can't -+ * copy individual scanlines in backwards direction, we need -+ * two-pass processing. We do it by programming a chain of dma -+ * control blocks in the first 16K part of the buffer and use -+ * the remaining 48K as the intermediate temporary scratch -+ * buffer. The buffer size is sufficient to handle up to -+ * 1920x1200 resolution at 32bpp pixel depth. -+ */ -+ int y; -+ dma_addr_t control_block_pa = fb->cb_handle; -+ dma_addr_t scratchbuf = fb->cb_handle + 16 * 1024; -+ int scanline_size = bytes_per_pixel * region->width; -+ int scanlines_per_cb = (64 * 1024 - 16 * 1024) / scanline_size; -+ -+ for (y = 0; y < region->height; y += scanlines_per_cb) { -+ dma_addr_t src = -+ fb->fb.fix.smem_start + -+ bytes_per_pixel * region->sx + -+ (region->sy + y) * fb->fb.fix.line_length; -+ dma_addr_t dst = -+ fb->fb.fix.smem_start + -+ bytes_per_pixel * region->dx + -+ (region->dy + y) * fb->fb.fix.line_length; -+ -+ if (region->height - y < scanlines_per_cb) -+ scanlines_per_cb = region->height - y; -+ -+ set_dma_cb(cb, burst_size, scratchbuf, scanline_size, -+ src, fb->fb.fix.line_length, -+ scanline_size, scanlines_per_cb); -+ control_block_pa += sizeof(struct bcm2708_dma_cb); -+ cb->next = control_block_pa; -+ cb++; -+ -+ set_dma_cb(cb, burst_size, dst, fb->fb.fix.line_length, -+ scratchbuf, scanline_size, -+ scanline_size, scanlines_per_cb); -+ control_block_pa += sizeof(struct bcm2708_dma_cb); -+ cb->next = control_block_pa; -+ cb++; -+ } -+ /* move the pointer back to the last dma control block */ -+ cb--; -+ } else { -+ /* A single dma control block is enough. */ -+ int sy, dy, stride; -+ if (region->dy <= region->sy) { -+ /* processing from top to bottom */ -+ dy = region->dy; -+ sy = region->sy; -+ stride = fb->fb.fix.line_length; -+ } else { -+ /* processing from bottom to top */ -+ dy = region->dy + region->height - 1; -+ sy = region->sy + region->height - 1; -+ stride = -fb->fb.fix.line_length; -+ } -+ set_dma_cb(cb, burst_size, -+ fb->fb.fix.smem_start + dy * fb->fb.fix.line_length + -+ bytes_per_pixel * region->dx, -+ stride, -+ fb->fb.fix.smem_start + sy * fb->fb.fix.line_length + -+ bytes_per_pixel * region->sx, -+ stride, -+ region->width * bytes_per_pixel, -+ region->height); -+ } -+ -+ /* end of dma control blocks chain */ -+ cb->next = 0; -+ -+ bcm_dma_start(fb->dma_chan_base, fb->cb_handle); -+ bcm_dma_wait_idle(fb->dma_chan_base); - } - - static void bcm2708_fb_imageblit(struct fb_info *info, -@@ -359,7 +487,7 @@ static int bcm2708_fb_register(struct bcm2708_fb *fb) - fb->dma = dma; - } - fb->fb.fbops = &bcm2708_fb_ops; -- fb->fb.flags = FBINFO_FLAG_DEFAULT; -+ fb->fb.flags = FBINFO_FLAG_DEFAULT | FBINFO_HWACCEL_COPYAREA; - fb->fb.pseudo_palette = fb->cmap; - - strncpy(fb->fb.fix.id, bcm2708_name, sizeof(fb->fb.fix.id)); -@@ -424,6 +552,28 @@ static int bcm2708_fb_probe(struct platform_device *dev) - } - memset(fb, 0, sizeof(struct bcm2708_fb)); - -+ fb->cb_base = dma_alloc_writecombine(&dev->dev, SZ_64K, -+ &fb->cb_handle, GFP_KERNEL); -+ if (!fb->cb_base) { -+ dev_err(&dev->dev, "cannot allocate DMA CBs\n"); -+ ret = -ENOMEM; -+ goto free_fb; -+ } -+ -+ pr_info("BCM2708FB: allocated DMA memory %08x\n", -+ fb->cb_handle); -+ -+ ret = bcm_dma_chan_alloc(BCM_DMA_FEATURE_BULK, -+ &fb->dma_chan_base, &fb->dma_irq); -+ if (ret < 0) { -+ dev_err(&dev->dev, "couldn't allocate a DMA channel\n"); -+ goto free_cb; -+ } -+ fb->dma_chan = ret; -+ -+ pr_info("BCM2708FB: allocated DMA channel %d @ %p\n", -+ fb->dma_chan, fb->dma_chan_base); -+ - fb->dev = dev; - - ret = bcm2708_fb_register(fb); -@@ -432,6 +582,9 @@ static int bcm2708_fb_probe(struct platform_device *dev) - goto out; - } - -+free_cb: -+ dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle); -+free_fb: - kfree(fb); - free_region: - dev_err(&dev->dev, "probe failed, err %d\n", ret); -@@ -449,6 +602,9 @@ static int bcm2708_fb_remove(struct platform_device *dev) - iounmap(fb->fb.screen_base); - unregister_framebuffer(&fb->fb); - -+ dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle); -+ bcm_dma_chan_free(fb->dma_chan); -+ - dma_free_coherent(NULL, PAGE_ALIGN(sizeof(*fb->info)), (void *)fb->info, - fb->dma); - kfree(fb); --- -1.9.1 - |