aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch
diff options
context:
space:
mode:
authorStijn Tintel <stijn@linux-ipv6.be>2018-11-10 13:03:18 +0200
committerStijn Tintel <stijn@linux-ipv6.be>2018-12-18 23:19:21 +0200
commitf5919b65d4c671fd5083838c7a445f319f9a13c8 (patch)
tree2c791d2a5dea5bbbb0b85f86f74afce2647c0726 /target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch
parent822b4c3b250a254e74407056ccfd5c6aa38da162 (diff)
downloadupstream-f5919b65d4c671fd5083838c7a445f319f9a13c8.tar.gz
upstream-f5919b65d4c671fd5083838c7a445f319f9a13c8.tar.bz2
upstream-f5919b65d4c671fd5083838c7a445f319f9a13c8.zip
brcm2708: add kernel 4.14 support
Patch generation process: - rebase rpi/rpi-4.14.y on v4.14.89 from linux-stable - git format-patch v4.14.89 Patches skipped during rebase: - lan78xx: Read MAC address from DT if present - lan78xx: Enable LEDs and auto-negotiation - Revert "softirq: Let ksoftirqd do its job" - sc16is7xx: Fix for multi-channel stall - lan78xx: Ignore DT MAC address if already valid - lan78xx: Simple patch to prevent some crashes - tcp_write_queue_purge clears all the SKBs in the write queue - Revert "lan78xx: Simple patch to prevent some crashes" - lan78xx: Connect phy early - Arm: mm: ftrace: Only set text back to ro after kernel has been marked ro - Revert "Revert "softirq: Let ksoftirqd do its job"" - ASoC: cs4265: SOC_SINGLE register value error fix - Revert "ASoC: cs4265: SOC_SINGLE register value error fix" - Revert "net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends" - Revert "Revert "net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"" Patches dropped after rebase: - net: Add non-mainline source for rtl8192cu wlan - net: Fix rtl8192cu build errors on other platforms - brcm: adds support for BCM43341 wifi - brcmfmac: Mute expected startup 'errors' - ARM64: Fix build break for RTL8187/RTL8192CU wifi - ARM64: Enable RTL8187/RTL8192CU wifi in build config - This is the driver for Sony CXD2880 DVB-T2/T tuner + demodulator - brcmfmac: add CLM download support - brcmfmac: request_firmware_direct is quieter - Sets the BCDC priority to constant 0 - brcmfmac: Disable ARP offloading when promiscuous - brcmfmac: Avoid possible out-of-bounds read - brcmfmac: Delete redundant length check - net: rtl8192cu: Normalize indentation - net: rtl8192cu: Fix implicit fallthrough warnings - Revert "Sets the BCDC priority to constant 0" - media: cxd2880: Bump to match 4.18.y version - media: cxd2880-spi: Bump to match 4.18.y version - Revert "mm: alloc_contig: re-allow CMA to compact FS pages" - Revert "Revert "mm: alloc_contig: re-allow CMA to compact FS pages"" - cxd2880: CXD2880_SPI_DRV should select DVB_CXD2880 with MEDIA_SUBDRV_AUTOSELECT - 950-0421-HID-hid-bigbenff-driver-for-BigBen-Interactive-PS3OF.patch - 950-0453-Add-hid-bigbenff-to-list-of-have_special_driver-for-.patch Make I2C built-in instead of modular as in upstream defconfig; also the easiest way to get MFD_ARIZONA enabled, which is required by kmod-sound-soc-rpi-cirrus. Add missing compatible strings from 4.9/960-add-rasbperrypi-compatible.patch, using upstream names for compute modules. Add extra patch to enable the LEDs on lan78xx. Compile-tested: bcm2708, bcm2709, bcm2710 (with CONFIG_ALL_KMODS=y) Runtime-tested: bcm2708, bcm2710 Signed-off-by: Stijn Tintel <stijn@linux-ipv6.be>
Diffstat (limited to 'target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch')
-rw-r--r--target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch209
1 files changed, 209 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch b/target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch
new file mode 100644
index 0000000000..a6d3234f26
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.14/950-0057-Speed-up-console-framebuffer-imageblit-function.patch
@@ -0,0 +1,209 @@
+From 0b3eb7b8f3ac701b27253ce407041ed69784306e Mon Sep 17 00:00:00 2001
+From: Harm Hanemaaijer <fgenfb@yahoo.com>
+Date: Thu, 20 Jun 2013 20:21:39 +0200
+Subject: [PATCH 057/454] Speed up console framebuffer imageblit function
+
+Especially on platforms with a slower CPU but a relatively high
+framebuffer fill bandwidth, like current ARM devices, the existing
+console monochrome imageblit function used to draw console text is
+suboptimal for common pixel depths such as 16bpp and 32bpp. The existing
+code is quite general and can deal with several pixel depths. By creating
+special case functions for 16bpp and 32bpp, by far the most common pixel
+formats used on modern systems, a significant speed-up is attained
+which can be readily felt on ARM-based devices like the Raspberry Pi
+and the Allwinner platform, but should help any platform using the
+fb layer.
+
+The special case functions allow constant folding, eliminating a number
+of instructions including divide operations, and allow the use of an
+unrolled loop, eliminating instructions with a variable shift size,
+reducing source memory access instructions, and eliminating excessive
+branching. These unrolled loops also allow much better code optimization
+by the C compiler. The code that selects which optimized variant is used
+is also simplified, eliminating integer divide instructions.
+
+The speed-up, measured by timing 'cat file.txt' in the console, varies
+between 40% and 70%, when testing on the Raspberry Pi and Allwinner
+ARM-based platforms, depending on font size and the pixel depth, with
+the greater benefit for 32bpp.
+
+Signed-off-by: Harm Hanemaaijer <fgenfb@yahoo.com>
+---
+ drivers/video/fbdev/core/cfbimgblt.c | 152 ++++++++++++++++++++++++++-
+ 1 file changed, 147 insertions(+), 5 deletions(-)
+
+--- a/drivers/video/fbdev/core/cfbimgblt.c
++++ b/drivers/video/fbdev/core/cfbimgblt.c
+@@ -28,6 +28,11 @@
+ *
+ * Also need to add code to deal with cards endians that are different than
+ * the native cpu endians. I also need to deal with MSB position in the word.
++ * Modified by Harm Hanemaaijer (fgenfb@yahoo.com) 2013:
++ * - Provide optimized versions of fast_imageblit for 16 and 32bpp that are
++ * significantly faster than the previous implementation.
++ * - Simplify the fast/slow_imageblit selection code, avoiding integer
++ * divides.
+ */
+ #include <linux/module.h>
+ #include <linux/string.h>
+@@ -262,6 +267,133 @@ static inline void fast_imageblit(const
+ }
+ }
+
++/*
++ * Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded
++ * into the code, main loop unrolled.
++ */
++
++static inline void fast_imageblit16(const struct fb_image *image,
++ struct fb_info *p, u8 __iomem * dst1,
++ u32 fgcolor, u32 bgcolor)
++{
++ u32 fgx = fgcolor, bgx = bgcolor;
++ u32 spitch = (image->width + 7) / 8;
++ u32 end_mask, eorx;
++ const char *s = image->data, *src;
++ u32 __iomem *dst;
++ const u32 *tab = NULL;
++ int i, j, k;
++
++ tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
++
++ fgx <<= 16;
++ bgx <<= 16;
++ fgx |= fgcolor;
++ bgx |= bgcolor;
++
++ eorx = fgx ^ bgx;
++ k = image->width / 2;
++
++ for (i = image->height; i--;) {
++ dst = (u32 __iomem *) dst1;
++ src = s;
++
++ j = k;
++ while (j >= 4) {
++ u8 bits = *src;
++ end_mask = tab[(bits >> 6) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 4) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 2) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[bits & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ src++;
++ j -= 4;
++ }
++ if (j != 0) {
++ u8 bits = *src;
++ end_mask = tab[(bits >> 6) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ if (j >= 2) {
++ end_mask = tab[(bits >> 4) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ if (j == 3) {
++ end_mask = tab[(bits >> 2) & 3];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst);
++ }
++ }
++ }
++ dst1 += p->fix.line_length;
++ s += spitch;
++ }
++}
++
++/*
++ * Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded
++ * into the code, main loop unrolled.
++ */
++
++static inline void fast_imageblit32(const struct fb_image *image,
++ struct fb_info *p, u8 __iomem * dst1,
++ u32 fgcolor, u32 bgcolor)
++{
++ u32 fgx = fgcolor, bgx = bgcolor;
++ u32 spitch = (image->width + 7) / 8;
++ u32 end_mask, eorx;
++ const char *s = image->data, *src;
++ u32 __iomem *dst;
++ const u32 *tab = NULL;
++ int i, j, k;
++
++ tab = cfb_tab32;
++
++ eorx = fgx ^ bgx;
++ k = image->width;
++
++ for (i = image->height; i--;) {
++ dst = (u32 __iomem *) dst1;
++ src = s;
++
++ j = k;
++ while (j >= 8) {
++ u8 bits = *src;
++ end_mask = tab[(bits >> 7) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 6) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 5) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 4) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 3) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 2) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[(bits >> 1) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ end_mask = tab[bits & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ src++;
++ j -= 8;
++ }
++ if (j != 0) {
++ u32 bits = (u32) * src;
++ while (j > 1) {
++ end_mask = tab[(bits >> 7) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
++ bits <<= 1;
++ j--;
++ }
++ end_mask = tab[(bits >> 7) & 1];
++ FB_WRITEL((end_mask & eorx) ^ bgx, dst);
++ }
++ dst1 += p->fix.line_length;
++ s += spitch;
++ }
++}
++
+ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
+ {
+ u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
+@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, co
+ bgcolor = image->bg_color;
+ }
+
+- if (32 % bpp == 0 && !start_index && !pitch_index &&
+- ((width & (32/bpp-1)) == 0) &&
+- bpp >= 8 && bpp <= 32)
+- fast_imageblit(image, p, dst1, fgcolor, bgcolor);
+- else
++ if (!start_index && !pitch_index) {
++ if (bpp == 32)
++ fast_imageblit32(image, p, dst1, fgcolor,
++ bgcolor);
++ else if (bpp == 16 && (width & 1) == 0)
++ fast_imageblit16(image, p, dst1, fgcolor,
++ bgcolor);
++ else if (bpp == 8 && (width & 3) == 0)
++ fast_imageblit(image, p, dst1, fgcolor,
++ bgcolor);
++ else
++ slow_imageblit(image, p, dst1, fgcolor,
++ bgcolor,
++ start_index, pitch_index);
++ } else
+ slow_imageblit(image, p, dst1, fgcolor, bgcolor,
+ start_index, pitch_index);
+ } else