summaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch')
-rw-r--r--target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch1496
1 files changed, 0 insertions, 1496 deletions
diff --git a/target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch b/target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch
deleted file mode 100644
index 1400b89e86..0000000000
--- a/target/linux/brcm2708/patches-4.1/0070-Improve-__copy_to_user-and-__copy_from_user-performa.patch
+++ /dev/null
@@ -1,1496 +0,0 @@
-From 738acf415f0e55f7ec8a2ff25eaefcff08e5d6aa Mon Sep 17 00:00:00 2001
-From: Phil Elwell <phil@raspberrypi.org>
-Date: Mon, 13 Oct 2014 11:47:53 +0100
-Subject: [PATCH 070/222] Improve __copy_to_user and __copy_from_user
- performance
-
-Provide a __copy_from_user that uses memcpy. On BCM2708, use
-optimised memcpy/memmove/memcmp/memset implementations.
----
- arch/arm/include/asm/string.h | 5 +
- arch/arm/include/asm/uaccess.h | 1 +
- arch/arm/lib/Makefile | 15 +-
- arch/arm/lib/arm-mem.h | 159 ++++++++++++
- arch/arm/lib/copy_from_user.S | 4 +-
- arch/arm/lib/exports_rpi.c | 37 +++
- arch/arm/lib/memcmp_rpi.S | 285 +++++++++++++++++++++
- arch/arm/lib/memcpy_rpi.S | 59 +++++
- arch/arm/lib/memcpymove.h | 506 +++++++++++++++++++++++++++++++++++++
- arch/arm/lib/memmove_rpi.S | 61 +++++
- arch/arm/lib/memset_rpi.S | 121 +++++++++
- arch/arm/lib/uaccess_with_memcpy.c | 112 +++++++-
- 12 files changed, 1359 insertions(+), 6 deletions(-)
- create mode 100644 arch/arm/lib/arm-mem.h
- create mode 100644 arch/arm/lib/exports_rpi.c
- create mode 100644 arch/arm/lib/memcmp_rpi.S
- create mode 100644 arch/arm/lib/memcpy_rpi.S
- create mode 100644 arch/arm/lib/memcpymove.h
- create mode 100644 arch/arm/lib/memmove_rpi.S
- create mode 100644 arch/arm/lib/memset_rpi.S
-
---- a/arch/arm/include/asm/string.h
-+++ b/arch/arm/include/asm/string.h
-@@ -24,6 +24,11 @@ extern void * memchr(const void *, int,
- #define __HAVE_ARCH_MEMSET
- extern void * memset(void *, int, __kernel_size_t);
-
-+#ifdef CONFIG_MACH_BCM2708
-+#define __HAVE_ARCH_MEMCMP
-+extern int memcmp(const void *, const void *, size_t);
-+#endif
-+
- extern void __memzero(void *ptr, __kernel_size_t n);
-
- #define memset(p,v,n) \
---- a/arch/arm/include/asm/uaccess.h
-+++ b/arch/arm/include/asm/uaccess.h
-@@ -475,6 +475,7 @@ do { \
-
- #ifdef CONFIG_MMU
- extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
-+extern unsigned long __must_check __copy_from_user_std(void *to, const void __user *from, unsigned long n);
- extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
- extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n);
- extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
---- a/arch/arm/lib/Makefile
-+++ b/arch/arm/lib/Makefile
-@@ -6,9 +6,8 @@
-
- lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
- csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
-- delay.o delay-loop.o findbit.o memchr.o memcpy.o \
-- memmove.o memset.o memzero.o setbit.o \
-- strchr.o strrchr.o \
-+ delay.o delay-loop.o findbit.o memchr.o memzero.o \
-+ setbit.o strchr.o strrchr.o \
- testchangebit.o testclearbit.o testsetbit.o \
- ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
- ucmpdi2.o lib1funcs.o div64.o \
-@@ -18,6 +17,16 @@ lib-y := backtrace.o changebit.o csumip
- mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
- copy_from_user.o copy_to_user.o
-
-+# Choose optimised implementations for Raspberry Pi
-+ifeq ($(CONFIG_MACH_BCM2708),y)
-+ CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
-+ CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
-+ obj-$(CONFIG_MODULES) += exports_rpi.o
-+ lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
-+else
-+ lib-y += memcpy.o memmove.o memset.o
-+endif
-+
- # using lib_ here won't override already available weak symbols
- obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
-
---- /dev/null
-+++ b/arch/arm/lib/arm-mem.h
-@@ -0,0 +1,159 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+.macro myfunc fname
-+ .func fname
-+ .global fname
-+fname:
-+.endm
-+
-+.macro preload_leading_step1 backwards, ptr, base
-+/* If the destination is already 16-byte aligned, then we need to preload
-+ * between 0 and prefetch_distance (inclusive) cache lines ahead so there
-+ * are no gaps when the inner loop starts.
-+ */
-+ .if backwards
-+ sub ptr, base, #1
-+ bic ptr, ptr, #31
-+ .else
-+ bic ptr, base, #31
-+ .endif
-+ .set OFFSET, 0
-+ .rept prefetch_distance+1
-+ pld [ptr, #OFFSET]
-+ .if backwards
-+ .set OFFSET, OFFSET-32
-+ .else
-+ .set OFFSET, OFFSET+32
-+ .endif
-+ .endr
-+.endm
-+
-+.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
-+/* However, if the destination is not 16-byte aligned, we may need to
-+ * preload one more cache line than that. The question we need to ask is:
-+ * are the leading bytes more than the amount by which the source
-+ * pointer will be rounded down for preloading, and if so, by how many
-+ * cache lines?
-+ */
-+ .if backwards
-+/* Here we compare against how many bytes we are into the
-+ * cache line, counting down from the highest such address.
-+ * Effectively, we want to calculate
-+ * leading_bytes = dst&15
-+ * cacheline_offset = 31-((src-leading_bytes-1)&31)
-+ * extra_needed = leading_bytes - cacheline_offset
-+ * and test if extra_needed is <= 0, or rearranging:
-+ * leading_bytes + (src-leading_bytes-1)&31 <= 31
-+ */
-+ mov tmp, base, lsl #32-5
-+ sbc tmp, tmp, leading_bytes, lsl #32-5
-+ adds tmp, tmp, leading_bytes, lsl #32-5
-+ bcc 61f
-+ pld [ptr, #-32*(prefetch_distance+1)]
-+ .else
-+/* Effectively, we want to calculate
-+ * leading_bytes = (-dst)&15
-+ * cacheline_offset = (src+leading_bytes)&31
-+ * extra_needed = leading_bytes - cacheline_offset
-+ * and test if extra_needed is <= 0.
-+ */
-+ mov tmp, base, lsl #32-5
-+ add tmp, tmp, leading_bytes, lsl #32-5
-+ rsbs tmp, tmp, leading_bytes, lsl #32-5
-+ bls 61f
-+ pld [ptr, #32*(prefetch_distance+1)]
-+ .endif
-+61:
-+.endm
-+
-+.macro preload_trailing backwards, base, remain, tmp
-+ /* We need either 0, 1 or 2 extra preloads */
-+ .if backwards
-+ rsb tmp, base, #0
-+ mov tmp, tmp, lsl #32-5
-+ .else
-+ mov tmp, base, lsl #32-5
-+ .endif
-+ adds tmp, tmp, remain, lsl #32-5
-+ adceqs tmp, tmp, #0
-+ /* The instruction above has two effects: ensures Z is only
-+ * set if C was clear (so Z indicates that both shifted quantities
-+ * were 0), and clears C if Z was set (so C indicates that the sum
-+ * of the shifted quantities was greater and not equal to 32) */
-+ beq 82f
-+ .if backwards
-+ sub tmp, base, #1
-+ bic tmp, tmp, #31
-+ .else
-+ bic tmp, base, #31
-+ .endif
-+ bcc 81f
-+ .if backwards
-+ pld [tmp, #-32*(prefetch_distance+1)]
-+81:
-+ pld [tmp, #-32*prefetch_distance]
-+ .else
-+ pld [tmp, #32*(prefetch_distance+2)]
-+81:
-+ pld [tmp, #32*(prefetch_distance+1)]
-+ .endif
-+82:
-+.endm
-+
-+.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
-+ .if backwards
-+ sub tmp0, base, #1
-+ bic tmp0, tmp0, #31
-+ pld [tmp0]
-+ sub tmp1, base, remain, lsl #shift
-+ .else
-+ bic tmp0, base, #31
-+ pld [tmp0]
-+ add tmp1, base, remain, lsl #shift
-+ sub tmp1, tmp1, #1
-+ .endif
-+ bic tmp1, tmp1, #31
-+ cmp tmp1, tmp0
-+ beq 92f
-+ .if narrow_case
-+ /* In this case, all the data fits in either 1 or 2 cache lines */
-+ pld [tmp1]
-+ .else
-+91:
-+ .if backwards
-+ sub tmp0, tmp0, #32
-+ .else
-+ add tmp0, tmp0, #32
-+ .endif
-+ cmp tmp0, tmp1
-+ pld [tmp0]
-+ bne 91b
-+ .endif
-+92:
-+.endm
---- a/arch/arm/lib/copy_from_user.S
-+++ b/arch/arm/lib/copy_from_user.S
-@@ -89,11 +89,13 @@
-
- .text
-
--ENTRY(__copy_from_user)
-+ENTRY(__copy_from_user_std)
-+WEAK(__copy_from_user)
-
- #include "copy_template.S"
-
- ENDPROC(__copy_from_user)
-+ENDPROC(__copy_from_user_std)
-
- .pushsection .fixup,"ax"
- .align 0
---- /dev/null
-+++ b/arch/arm/lib/exports_rpi.c
-@@ -0,0 +1,37 @@
-+/**
-+ * Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions, and the following disclaimer,
-+ * without modification.
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the distribution.
-+ * 3. The names of the above-listed copyright holders may not be used
-+ * to endorse or promote products derived from this software without
-+ * specific prior written permission.
-+ *
-+ * ALTERNATIVELY, this software may be distributed under the terms of the
-+ * GNU General Public License ("GPL") version 2, as published by the Free
-+ * Software Foundation.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+
-+EXPORT_SYMBOL(memcmp);
---- /dev/null
-+++ b/arch/arm/lib/memcmp_rpi.S
-@@ -0,0 +1,285 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+#include <linux/linkage.h>
-+#include "arm-mem.h"
-+
-+/* Prevent the stack from becoming executable */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+ .text
-+ .arch armv6
-+ .object_arch armv4
-+ .arm
-+ .altmacro
-+ .p2align 2
-+
-+.macro memcmp_process_head unaligned
-+ .if unaligned
-+ ldr DAT0, [S_1], #4
-+ ldr DAT1, [S_1], #4
-+ ldr DAT2, [S_1], #4
-+ ldr DAT3, [S_1], #4
-+ .else
-+ ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
-+ .endif
-+ ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
-+.endm
-+
-+.macro memcmp_process_tail
-+ cmp DAT0, DAT4
-+ cmpeq DAT1, DAT5
-+ cmpeq DAT2, DAT6
-+ cmpeq DAT3, DAT7
-+ bne 200f
-+.endm
-+
-+.macro memcmp_leading_31bytes
-+ movs DAT0, OFF, lsl #31
-+ ldrmib DAT0, [S_1], #1
-+ ldrcsh DAT1, [S_1], #2
-+ ldrmib DAT4, [S_2], #1
-+ ldrcsh DAT5, [S_2], #2
-+ movpl DAT0, #0
-+ movcc DAT1, #0
-+ movpl DAT4, #0
-+ movcc DAT5, #0
-+ submi N, N, #1
-+ subcs N, N, #2
-+ cmp DAT0, DAT4
-+ cmpeq DAT1, DAT5
-+ bne 200f
-+ movs DAT0, OFF, lsl #29
-+ ldrmi DAT0, [S_1], #4
-+ ldrcs DAT1, [S_1], #4
-+ ldrcs DAT2, [S_1], #4
-+ ldrmi DAT4, [S_2], #4
-+ ldmcsia S_2!, {DAT5, DAT6}
-+ movpl DAT0, #0
-+ movcc DAT1, #0
-+ movcc DAT2, #0
-+ movpl DAT4, #0
-+ movcc DAT5, #0
-+ movcc DAT6, #0
-+ submi N, N, #4
-+ subcs N, N, #8
-+ cmp DAT0, DAT4
-+ cmpeq DAT1, DAT5
-+ cmpeq DAT2, DAT6
-+ bne 200f
-+ tst OFF, #16
-+ beq 105f
-+ memcmp_process_head 1
-+ sub N, N, #16
-+ memcmp_process_tail
-+105:
-+.endm
-+
-+.macro memcmp_trailing_15bytes unaligned
-+ movs N, N, lsl #29
-+ .if unaligned
-+ ldrcs DAT0, [S_1], #4
-+ ldrcs DAT1, [S_1], #4
-+ .else
-+ ldmcsia S_1!, {DAT0, DAT1}
-+ .endif
-+ ldrmi DAT2, [S_1], #4
-+ ldmcsia S_2!, {DAT4, DAT5}
-+ ldrmi DAT6, [S_2], #4
-+ movcc DAT0, #0
-+ movcc DAT1, #0
-+ movpl DAT2, #0
-+ movcc DAT4, #0
-+ movcc DAT5, #0
-+ movpl DAT6, #0
-+ cmp DAT0, DAT4
-+ cmpeq DAT1, DAT5
-+ cmpeq DAT2, DAT6
-+ bne 200f
-+ movs N, N, lsl #2
-+ ldrcsh DAT0, [S_1], #2
-+ ldrmib DAT1, [S_1]
-+ ldrcsh DAT4, [S_2], #2
-+ ldrmib DAT5, [S_2]
-+ movcc DAT0, #0
-+ movpl DAT1, #0
-+ movcc DAT4, #0
-+ movpl DAT5, #0
-+ cmp DAT0, DAT4
-+ cmpeq DAT1, DAT5
-+ bne 200f
-+.endm
-+
-+.macro memcmp_long_inner_loop unaligned
-+110:
-+ memcmp_process_head unaligned
-+ pld [S_2, #prefetch_distance*32 + 16]
-+ memcmp_process_tail
-+ memcmp_process_head unaligned
-+ pld [S_1, OFF]
-+ memcmp_process_tail
-+ subs N, N, #32
-+ bhs 110b
-+ /* Just before the final (prefetch_distance+1) 32-byte blocks,
-+ * deal with final preloads */
-+ preload_trailing 0, S_1, N, DAT0
-+ preload_trailing 0, S_2, N, DAT0
-+ add N, N, #(prefetch_distance+2)*32 - 16
-+120:
-+ memcmp_process_head unaligned
-+ memcmp_process_tail
-+ subs N, N, #16
-+ bhs 120b
-+ /* Trailing words and bytes */
-+ tst N, #15
-+ beq 199f
-+ memcmp_trailing_15bytes unaligned
-+199: /* Reached end without detecting a difference */
-+ mov a1, #0
-+ setend le
-+ pop {DAT1-DAT6, pc}
-+.endm
-+
-+.macro memcmp_short_inner_loop unaligned
-+ subs N, N, #16 /* simplifies inner loop termination */
-+ blo 122f
-+120:
-+ memcmp_process_head unaligned
-+ memcmp_process_tail
-+ subs N, N, #16
-+ bhs 120b
-+122: /* Trailing words and bytes */
-+ tst N, #15
-+ beq 199f
-+ memcmp_trailing_15bytes unaligned
-+199: /* Reached end without detecting a difference */
-+ mov a1, #0
-+ setend le
-+ pop {DAT1-DAT6, pc}
-+.endm
-+
-+/*
-+ * int memcmp(const void *s1, const void *s2, size_t n);
-+ * On entry:
-+ * a1 = pointer to buffer 1
-+ * a2 = pointer to buffer 2
-+ * a3 = number of bytes to compare (as unsigned chars)
-+ * On exit:
-+ * a1 = >0/=0/<0 if s1 >/=/< s2
-+ */
-+
-+.set prefetch_distance, 2
-+
-+ENTRY(memcmp)
-+ S_1 .req a1
-+ S_2 .req a2
-+ N .req a3
-+ DAT0 .req a4
-+ DAT1 .req v1
-+ DAT2 .req v2
-+ DAT3 .req v3
-+ DAT4 .req v4
-+ DAT5 .req v5
-+ DAT6 .req v6
-+ DAT7 .req ip
-+ OFF .req lr
-+
-+ push {DAT1-DAT6, lr}
-+ setend be /* lowest-addressed bytes are most significant */
-+
-+ /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
-+ cmp N, #(prefetch_distance+3)*32 - 1
-+ blo 170f
-+
-+ /* Long case */
-+ /* Adjust N so that the decrement instruction can also test for
-+ * inner loop termination. We want it to stop when there are
-+ * (prefetch_distance+1) complete blocks to go. */
-+ sub N, N, #(prefetch_distance+2)*32
-+ preload_leading_step1 0, DAT0, S_1
-+ preload_leading_step1 0, DAT1, S_2
-+ tst S_2, #31
-+ beq 154f
-+ rsb OFF, S_2, #0 /* no need to AND with 15 here */
-+ preload_leading_step2 0, DAT0, S_1, OFF, DAT2
-+ preload_leading_step2 0, DAT1, S_2, OFF, DAT2
-+ memcmp_leading_31bytes
-+154: /* Second source now cacheline (32-byte) aligned; we have at
-+ * least one prefetch to go. */
-+ /* Prefetch offset is best selected such that it lies in the
-+ * first 8 of each 32 bytes - but it's just as easy to aim for
-+ * the first one */
-+ and OFF, S_1, #31
-+ rsb OFF, OFF, #32*prefetch_distance
-+ tst S_1, #3
-+ bne 140f
-+ memcmp_long_inner_loop 0
-+140: memcmp_long_inner_loop 1
-+
-+170: /* Short case */
-+ teq N, #0
-+ beq 199f
-+ preload_all 0, 0, 0, S_1, N, DAT0, DAT1
-+ preload_all 0, 0, 0, S_2, N, DAT0, DAT1
-+ tst S_2, #3
-+ beq 174f
-+172: subs N, N, #1
-+ blo 199f
-+ ldrb DAT0, [S_1], #1
-+ ldrb DAT4, [S_2], #1
-+ cmp DAT0, DAT4
-+ bne 200f
-+ tst S_2, #3
-+ bne 172b
-+174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
-+ tst S_1, #3
-+ bne 140f
-+ memcmp_short_inner_loop 0
-+140: memcmp_short_inner_loop 1
-+
-+200: /* Difference found: determine sign. */
-+ movhi a1, #1
-+ movlo a1, #-1
-+ setend le
-+ pop {DAT1-DAT6, pc}
-+
-+ .unreq S_1
-+ .unreq S_2
-+ .unreq N
-+ .unreq DAT0
-+ .unreq DAT1
-+ .unreq DAT2
-+ .unreq DAT3
-+ .unreq DAT4
-+ .unreq DAT5
-+ .unreq DAT6
-+ .unreq DAT7
-+ .unreq OFF
-+ENDPROC(memcmp)
---- /dev/null
-+++ b/arch/arm/lib/memcpy_rpi.S
-@@ -0,0 +1,59 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+#include <linux/linkage.h>
-+#include "arm-mem.h"
-+#include "memcpymove.h"
-+
-+/* Prevent the stack from becoming executable */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+ .text
-+ .arch armv6
-+ .object_arch armv4
-+ .arm
-+ .altmacro
-+ .p2align 2
-+
-+/*
-+ * void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
-+ * On entry:
-+ * a1 = pointer to destination
-+ * a2 = pointer to source
-+ * a3 = number of bytes to copy
-+ * On exit:
-+ * a1 preserved
-+ */
-+
-+.set prefetch_distance, 3
-+
-+ENTRY(memcpy)
-+ memcpy 0
-+ENDPROC(memcpy)
---- /dev/null
-+++ b/arch/arm/lib/memcpymove.h
-@@ -0,0 +1,506 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
-+ .if words == 1
-+ .if backwards
-+ mov r1, r0, lsl #32-align*8
-+ ldr r0, [S, #-4]!
-+ orr r1, r1, r0, lsr #align*8
-+ str r1, [D, #-4]!
-+ .else
-+ mov r0, r1, lsr #align*8
-+ ldr r1, [S, #4]!
-+ orr r0, r0, r1, lsl #32-align*8
-+ str r0, [D], #4
-+ .endif
-+ .elseif words == 2
-+ .if backwards
-+ ldr r1, [S, #-4]!
-+ mov r2, r0, lsl #32-align*8
-+ ldr r0, [S, #-4]!
-+ orr r2, r2, r1, lsr #align*8
-+ mov r1, r1, lsl #32-align*8
-+ orr r1, r1, r0, lsr #align*8
-+ stmdb D!, {r1, r2}
-+ .else
-+ ldr r1, [S, #4]!
-+ mov r0, r2, lsr #align*8
-+ ldr r2, [S, #4]!
-+ orr r0, r0, r1, lsl #32-align*8
-+ mov r1, r1, lsr #align*8
-+ orr r1, r1, r2, lsl #32-align*8
-+ stmia D!, {r0, r1}
-+ .endif
-+ .elseif words == 4
-+ .if backwards
-+ ldmdb S!, {r2, r3}
-+ mov r4, r0, lsl #32-align*8
-+ ldmdb S!, {r0, r1}
-+ orr r4, r4, r3, lsr #align*8
-+ mov r3, r3, lsl #32-align*8
-+ orr r3, r3, r2, lsr #align*8
-+ mov r2, r2, lsl #32-align*8
-+ orr r2, r2, r1, lsr #align*8
-+ mov r1, r1, lsl #32-align*8
-+ orr r1, r1, r0, lsr #align*8
-+ stmdb D!, {r1, r2, r3, r4}
-+ .else
-+ ldmib S!, {r1, r2}
-+ mov r0, r4, lsr #align*8
-+ ldmib S!, {r3, r4}
-+ orr r0, r0, r1, lsl #32-align*8
-+ mov r1, r1, lsr #align*8
-+ orr r1, r1, r2, lsl #32-align*8
-+ mov r2, r2, lsr #align*8
-+ orr r2, r2, r3, lsl #32-align*8
-+ mov r3, r3, lsr #align*8
-+ orr r3, r3, r4, lsl #32-align*8
-+ stmia D!, {r0, r1, r2, r3}
-+ .endif
-+ .elseif words == 8
-+ .if backwards
-+ ldmdb S!, {r4, r5, r6, r7}
-+ mov r8, r0, lsl #32-align*8
-+ ldmdb S!, {r0, r1, r2, r3}
-+ .if use_pld
-+ pld [S, OFF]
-+ .endif
-+ orr r8, r8, r7, lsr #align*8
-+ mov r7, r7, lsl #32-align*8
-+ orr r7, r7, r6, lsr #align*8
-+ mov r6, r6, lsl #32-align*8
-+ orr r6, r6, r5, lsr #align*8
-+ mov r5, r5, lsl #32-align*8
-+ orr r5, r5, r4, lsr #align*8
-+ mov r4, r4, lsl #32-align*8
-+ orr r4, r4, r3, lsr #align*8
-+ mov r3, r3, lsl #32-align*8
-+ orr r3, r3, r2, lsr #align*8
-+ mov r2, r2, lsl #32-align*8
-+ orr r2, r2, r1, lsr #align*8
-+ mov r1, r1, lsl #32-align*8
-+ orr r1, r1, r0, lsr #align*8
-+ stmdb D!, {r5, r6, r7, r8}
-+ stmdb D!, {r1, r2, r3, r4}
-+ .else
-+ ldmib S!, {r1, r2, r3, r4}
-+ mov r0, r8, lsr #align*8
-+ ldmib S!, {r5, r6, r7, r8}
-+ .if use_pld
-+ pld [S, OFF]
-+ .endif
-+ orr r0, r0, r1, lsl #32-align*8
-+ mov r1, r1, lsr #align*8
-+ orr r1, r1, r2, lsl #32-align*8
-+ mov r2, r2, lsr #align*8
-+ orr r2, r2, r3, lsl #32-align*8
-+ mov r3, r3, lsr #align*8
-+ orr r3, r3, r4, lsl #32-align*8
-+ mov r4, r4, lsr #align*8
-+ orr r4, r4, r5, lsl #32-align*8
-+ mov r5, r5, lsr #align*8
-+ orr r5, r5, r6, lsl #32-align*8
-+ mov r6, r6, lsr #align*8
-+ orr r6, r6, r7, lsl #32-align*8
-+ mov r7, r7, lsr #align*8
-+ orr r7, r7, r8, lsl #32-align*8
-+ stmia D!, {r0, r1, r2, r3}
-+ stmia D!, {r4, r5, r6, r7}
-+ .endif
-+ .endif
-+.endm
-+
-+.macro memcpy_leading_15bytes backwards, align
-+ movs DAT1, DAT2, lsl #31
-+ sub N, N, DAT2
-+ .if backwards
-+ ldrmib DAT0, [S, #-1]!
-+ ldrcsh DAT1, [S, #-2]!
-+ strmib DAT0, [D, #-1]!
-+ strcsh DAT1, [D, #-2]!
-+ .else
-+ ldrmib DAT0, [S], #1
-+ ldrcsh DAT1, [S], #2
-+ strmib DAT0, [D], #1
-+ strcsh DAT1, [D], #2
-+ .endif
-+ movs DAT1, DAT2, lsl #29
-+ .if backwards
-+ ldrmi DAT0, [S, #-4]!
-+ .if align == 0
-+ ldmcsdb S!, {DAT1, DAT2}
-+ .else
-+ ldrcs DAT2, [S, #-4]!
-+ ldrcs DAT1, [S, #-4]!
-+ .endif
-+ strmi DAT0, [D, #-4]!
-+ stmcsdb D!, {DAT1, DAT2}
-+ .else
-+ ldrmi DAT0, [S], #4
-+ .if align == 0
-+ ldmcsia S!, {DAT1, DAT2}
-+ .else
-+ ldrcs DAT1, [S], #4
-+ ldrcs DAT2, [S], #4
-+ .endif
-+ strmi DAT0, [D], #4
-+ stmcsia D!, {DAT1, DAT2}
-+ .endif
-+.endm
-+
-+.macro memcpy_trailing_15bytes backwards, align
-+ movs N, N, lsl #29
-+ .if backwards
-+ .if align == 0
-+ ldmcsdb S!, {DAT0, DAT1}
-+ .else
-+ ldrcs DAT1, [S, #-4]!
-+ ldrcs DAT0, [S, #-4]!
-+ .endif
-+ ldrmi DAT2, [S, #-4]!
-+ stmcsdb D!, {DAT0, DAT1}
-+ strmi DAT2, [D, #-4]!
-+ .else
-+ .if align == 0
-+ ldmcsia S!, {DAT0, DAT1}
-+ .else
-+ ldrcs DAT0, [S], #4
-+ ldrcs DAT1, [S], #4
-+ .endif
-+ ldrmi DAT2, [S], #4
-+ stmcsia D!, {DAT0, DAT1}
-+ strmi DAT2, [D], #4
-+ .endif
-+ movs N, N, lsl #2
-+ .if backwards
-+ ldrcsh DAT0, [S, #-2]!
-+ ldrmib DAT1, [S, #-1]
-+ strcsh DAT0, [D, #-2]!
-+ strmib DAT1, [D, #-1]
-+ .else
-+ ldrcsh DAT0, [S], #2
-+ ldrmib DAT1, [S]
-+ strcsh DAT0, [D], #2
-+ strmib DAT1, [D]
-+ .endif
-+.endm
-+
-+.macro memcpy_long_inner_loop backwards, align
-+ .if align != 0
-+ .if backwards
-+ ldr DAT0, [S, #-align]!
-+ .else
-+ ldr LAST, [S, #-align]!
-+ .endif
-+ .endif
-+110:
-+ .if align == 0
-+ .if backwards
-+ ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
-+ pld [S, OFF]
-+ stmdb D!, {DAT4, DAT5, DAT6, LAST}
-+ stmdb D!, {DAT0, DAT1, DAT2, DAT3}
-+ .else
-+ ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
-+ pld [S, OFF]
-+ stmia D!, {DAT0, DAT1, DAT2, DAT3}
-+ stmia D!, {DAT4, DAT5, DAT6, LAST}
-+ .endif
-+ .else
-+ unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
-+ .endif
-+ subs N, N, #32
-+ bhs 110b
-+ /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
-+ preload_trailing backwards, S, N, OFF
-+ add N, N, #(prefetch_distance+2)*32 - 32
-+120:
-+ .if align == 0
-+ .if backwards
-+ ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
-+ stmdb D!, {DAT4, DAT5, DAT6, LAST}
-+ stmdb D!, {DAT0, DAT1, DAT2, DAT3}
-+ .else
-+ ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
-+ stmia D!, {DAT0, DAT1, DAT2, DAT3}
-+ stmia D!, {DAT4, DAT5, DAT6, LAST}
-+ .endif
-+ .else
-+ unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
-+ .endif
-+ subs N, N, #32
-+ bhs 120b
-+ tst N, #16
-+ .if align == 0
-+ .if backwards
-+ ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
-+ stmnedb D!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ ldmneia S!, {DAT0, DAT1, DAT2, LAST}
-+ stmneia D!, {DAT0, DAT1, DAT2, LAST}
-+ .endif
-+ .else
-+ beq 130f
-+ unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
-+130:
-+ .endif
-+ /* Trailing words and bytes */
-+ tst N, #15
-+ beq 199f
-+ .if align != 0
-+ add S, S, #align
-+ .endif
-+ memcpy_trailing_15bytes backwards, align
-+199:
-+ pop {DAT3, DAT4, DAT5, DAT6, DAT7}
-+ pop {D, DAT1, DAT2, pc}
-+.endm
-+
-+.macro memcpy_medium_inner_loop backwards, align
-+120:
-+ .if backwards
-+ .if align == 0
-+ ldmdb S!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ ldr LAST, [S, #-4]!
-+ ldr DAT2, [S, #-4]!
-+ ldr DAT1, [S, #-4]!
-+ ldr DAT0, [S, #-4]!
-+ .endif
-+ stmdb D!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ .if align == 0
-+ ldmia S!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ ldr DAT0, [S], #4
-+ ldr DAT1, [S], #4
-+ ldr DAT2, [S], #4
-+ ldr LAST, [S], #4
-+ .endif
-+ stmia D!, {DAT0, DAT1, DAT2, LAST}
-+ .endif
-+ subs N, N, #16
-+ bhs 120b
-+ /* Trailing words and bytes */
-+ tst N, #15
-+ beq 199f
-+ memcpy_trailing_15bytes backwards, align
-+199:
-+ pop {D, DAT1, DAT2, pc}
-+.endm
-+
-+.macro memcpy_short_inner_loop backwards, align
-+ tst N, #16
-+ .if backwards
-+ .if align == 0
-+ ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ ldrne LAST, [S, #-4]!
-+ ldrne DAT2, [S, #-4]!
-+ ldrne DAT1, [S, #-4]!
-+ ldrne DAT0, [S, #-4]!
-+ .endif
-+ stmnedb D!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ .if align == 0
-+ ldmneia S!, {DAT0, DAT1, DAT2, LAST}
-+ .else
-+ ldrne DAT0, [S], #4
-+ ldrne DAT1, [S], #4
-+ ldrne DAT2, [S], #4
-+ ldrne LAST, [S], #4
-+ .endif
-+ stmneia D!, {DAT0, DAT1, DAT2, LAST}
-+ .endif
-+ memcpy_trailing_15bytes backwards, align
-+199:
-+ pop {D, DAT1, DAT2, pc}
-+.endm
-+
-+.macro memcpy backwards
-+ D .req a1
-+ S .req a2
-+ N .req a3
-+ DAT0 .req a4
-+ DAT1 .req v1
-+ DAT2 .req v2
-+ DAT3 .req v3
-+ DAT4 .req v4
-+ DAT5 .req v5
-+ DAT6 .req v6
-+ DAT7 .req sl
-+ LAST .req ip
-+ OFF .req lr
-+
-+ .cfi_startproc
-+
-+ push {D, DAT1, DAT2, lr}
-+
-+ .cfi_def_cfa_offset 16
-+ .cfi_rel_offset D, 0
-+ .cfi_undefined S
-+ .cfi_undefined N
-+ .cfi_undefined DAT0
-+ .cfi_rel_offset DAT1, 4
-+ .cfi_rel_offset DAT2, 8
-+ .cfi_undefined LAST
-+ .cfi_rel_offset lr, 12
-+
-+ .if backwards
-+ add D, D, N
-+ add S, S, N
-+ .endif
-+
-+ /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
-+ cmp N, #31
-+ blo 170f
-+ /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
-+ cmp N, #(prefetch_distance+3)*32 - 1
-+ blo 160f
-+
-+ /* Long case */
-+ push {DAT3, DAT4, DAT5, DAT6, DAT7}
-+
-+ .cfi_def_cfa_offset 36
-+ .cfi_rel_offset D, 20
-+ .cfi_rel_offset DAT1, 24
-+ .cfi_rel_offset DAT2, 28
-+ .cfi_rel_offset DAT3, 0
-+ .cfi_rel_offset DAT4, 4
-+ .cfi_rel_offset DAT5, 8
-+ .cfi_rel_offset DAT6, 12
-+ .cfi_rel_offset DAT7, 16
-+ .cfi_rel_offset lr, 32
-+
-+ /* Adjust N so that the decrement instruction can also test for
-+ * inner loop termination. We want it to stop when there are
-+ * (prefetch_distance+1) complete blocks to go. */
-+ sub N, N, #(prefetch_distance+2)*32
-+ preload_leading_step1 backwards, DAT0, S
-+ .if backwards
-+ /* Bug in GAS: it accepts, but mis-assembles the instruction
-+ * ands DAT2, D, #60, 2
-+ * which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
-+ */
-+ .word 0xE210513C
-+ beq 154f
-+ .else
-+ ands DAT2, D, #15
-+ beq 154f
-+ rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
-+ .endif
-+ preload_leading_step2 backwards, DAT0, S, DAT2, OFF
-+ memcpy_leading_15bytes backwards, 1
-+154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
-+ /* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
-+ .if backwards
-+ rsb OFF, S, #3
-+ and OFF, OFF, #28
-+ sub OFF, OFF, #32*(prefetch_distance+1)
-+ .else
-+ and OFF, S, #28
-+ rsb OFF, OFF, #32*prefetch_distance
-+ .endif
-+ movs DAT0, S, lsl #31
-+ bhi 157f
-+ bcs 156f
-+ bmi 155f
-+ memcpy_long_inner_loop backwards, 0
-+155: memcpy_long_inner_loop backwards, 1
-+156: memcpy_long_inner_loop backwards, 2
-+157: memcpy_long_inner_loop backwards, 3
-+
-+ .cfi_def_cfa_offset 16
-+ .cfi_rel_offset D, 0
-+ .cfi_rel_offset DAT1, 4
-+ .cfi_rel_offset DAT2, 8
-+ .cfi_same_value DAT3
-+ .cfi_same_value DAT4
-+ .cfi_same_value DAT5
-+ .cfi_same_value DAT6
-+ .cfi_same_value DAT7
-+ .cfi_rel_offset lr, 12
-+
-+160: /* Medium case */
-+ preload_all backwards, 0, 0, S, N, DAT2, OFF
-+ sub N, N, #16 /* simplifies inner loop termination */
-+ .if backwards
-+ ands DAT2, D, #15
-+ beq 164f
-+ .else
-+ ands DAT2, D, #15
-+ beq 164f
-+ rsb DAT2, DAT2, #16
-+ .endif
-+ memcpy_leading_15bytes backwards, align
-+164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
-+ tst S, #3
-+ bne 140f
-+ memcpy_medium_inner_loop backwards, 0
-+140: memcpy_medium_inner_loop backwards, 1
-+
-+170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
-+ teq N, #0
-+ beq 199f
-+ preload_all backwards, 1, 0, S, N, DAT2, LAST
-+ tst D, #3
-+ beq 174f
-+172: subs N, N, #1
-+ blo 199f
-+ .if backwards
-+ ldrb DAT0, [S, #-1]!
-+ strb DAT0, [D, #-1]!
-+ .else
-+ ldrb DAT0, [S], #1
-+ strb DAT0, [D], #1
-+ .endif
-+ tst D, #3
-+ bne 172b
-+174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
-+ tst S, #3
-+ bne 140f
-+ memcpy_short_inner_loop backwards, 0
-+140: memcpy_short_inner_loop backwards, 1
-+
-+ .cfi_endproc
-+
-+ .unreq D
-+ .unreq S
-+ .unreq N
-+ .unreq DAT0
-+ .unreq DAT1
-+ .unreq DAT2
-+ .unreq DAT3
-+ .unreq DAT4
-+ .unreq DAT5
-+ .unreq DAT6
-+ .unreq DAT7
-+ .unreq LAST
-+ .unreq OFF
-+.endm
---- /dev/null
-+++ b/arch/arm/lib/memmove_rpi.S
-@@ -0,0 +1,61 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+#include <linux/linkage.h>
-+#include "arm-mem.h"
-+#include "memcpymove.h"
-+
-+/* Prevent the stack from becoming executable */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+ .text
-+ .arch armv6
-+ .object_arch armv4
-+ .arm
-+ .altmacro
-+ .p2align 2
-+
-+/*
-+ * void *memmove(void *s1, const void *s2, size_t n);
-+ * On entry:
-+ * a1 = pointer to destination
-+ * a2 = pointer to source
-+ * a3 = number of bytes to copy
-+ * On exit:
-+ * a1 preserved
-+ */
-+
-+.set prefetch_distance, 3
-+
-+ENTRY(memmove)
-+ cmp a2, a1
-+ bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
-+ memcpy 1
-+ENDPROC(memmove)
---- /dev/null
-+++ b/arch/arm/lib/memset_rpi.S
-@@ -0,0 +1,121 @@
-+/*
-+Copyright (c) 2013, Raspberry Pi Foundation
-+Copyright (c) 2013, RISC OS Open Ltd
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+ * Redistributions of source code must retain the above copyright
-+ notice, this list of conditions and the following disclaimer.
-+ * Redistributions in binary form must reproduce the above copyright
-+ notice, this list of conditions and the following disclaimer in the
-+ documentation and/or other materials provided with the distribution.
-+ * Neither the name of the copyright holder nor the
-+ names of its contributors may be used to endorse or promote products
-+ derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+*/
-+
-+#include <linux/linkage.h>
-+#include "arm-mem.h"
-+
-+/* Prevent the stack from becoming executable */
-+#if defined(__linux__) && defined(__ELF__)
-+.section .note.GNU-stack,"",%progbits
-+#endif
-+
-+ .text
-+ .arch armv6
-+ .object_arch armv4
-+ .arm
-+ .altmacro
-+ .p2align 2
-+
-+/*
-+ * void *memset(void *s, int c, size_t n);
-+ * On entry:
-+ * a1 = pointer to buffer to fill
-+ * a2 = byte pattern to fill with (caller-narrowed)
-+ * a3 = number of bytes to fill
-+ * On exit:
-+ * a1 preserved
-+ */
-+ENTRY(memset)
-+ S .req a1
-+ DAT0 .req a2
-+ N .req a3
-+ DAT1 .req a4
-+ DAT2 .req ip
-+ DAT3 .req lr
-+
-+ orr DAT0, DAT0, lsl #8
-+ push {S, lr}
-+ orr DAT0, DAT0, lsl #16
-+ mov DAT1, DAT0
-+
-+ /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
-+ cmp N, #31
-+ blo 170f
-+
-+161: sub N, N, #16 /* simplifies inner loop termination */
-+ /* Leading words and bytes */
-+ tst S, #15
-+ beq 164f
-+ rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
-+ movs DAT2, DAT3, lsl #31
-+ submi N, N, #1
-+ strmib DAT0, [S], #1
-+ subcs N, N, #2
-+ strcsh DAT0, [S], #2
-+ movs DAT2, DAT3, lsl #29
-+ submi N, N, #4
-+ strmi DAT0, [S], #4
-+ subcs N, N, #8
-+ stmcsia S!, {DAT0, DAT1}
-+164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
-+ mov DAT2, DAT0
-+ mov DAT3, DAT0
-+ /* Now the inner loop of 16-byte stores */
-+165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
-+ subs N, N, #16
-+ bhs 165b
-+166: /* Trailing words and bytes */
-+ movs N, N, lsl #29
-+ stmcsia S!, {DAT0, DAT1}
-+ strmi DAT0, [S], #4
-+ movs N, N, lsl #2
-+ strcsh DAT0, [S], #2
-+ strmib DAT0, [S]
-+199: pop {S, pc}
-+
-+170: /* Short case */
-+ mov DAT2, DAT0
-+ mov DAT3, DAT0
-+ tst S, #3
-+ beq 174f
-+172: subs N, N, #1
-+ blo 199b
-+ strb DAT0, [S], #1
-+ tst S, #3
-+ bne 172b
-+174: tst N, #16
-+ stmneia S!, {DAT0, DAT1, DAT2, DAT3}
-+ b 166b
-+
-+ .unreq S
-+ .unreq DAT0
-+ .unreq N
-+ .unreq DAT1
-+ .unreq DAT2
-+ .unreq DAT3
-+ENDPROC(memset)
---- a/arch/arm/lib/uaccess_with_memcpy.c
-+++ b/arch/arm/lib/uaccess_with_memcpy.c
-@@ -22,6 +22,14 @@
- #include <asm/current.h>
- #include <asm/page.h>
-
-+#ifndef COPY_FROM_USER_THRESHOLD
-+#define COPY_FROM_USER_THRESHOLD 64
-+#endif
-+
-+#ifndef COPY_TO_USER_THRESHOLD
-+#define COPY_TO_USER_THRESHOLD 64
-+#endif
-+
- static int
- pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
- {
-@@ -85,7 +93,44 @@ pin_page_for_write(const void __user *_a
- return 1;
- }
-
--static unsigned long noinline
-+static int
-+pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
-+{
-+ unsigned long addr = (unsigned long)_addr;
-+ pgd_t *pgd;
-+ pmd_t *pmd;
-+ pte_t *pte;
-+ pud_t *pud;
-+ spinlock_t *ptl;
-+
-+ pgd = pgd_offset(current->mm, addr);
-+ if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
-+ {
-+ return 0;
-+ }
-+ pud = pud_offset(pgd, addr);
-+ if (unlikely(pud_none(*pud) || pud_bad(*pud)))
-+ {
-+ return 0;
-+ }
-+
-+ pmd = pmd_offset(pud, addr);
-+ if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
-+ return 0;
-+
-+ pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
-+ if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
-+ pte_unmap_unlock(pte, ptl);
-+ return 0;
-+ }
-+
-+ *ptep = pte;
-+ *ptlp = ptl;
-+
-+ return 1;
-+}
-+
-+unsigned long noinline
- __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
- {
- int atomic;
-@@ -135,6 +180,54 @@ out:
- return n;
- }
-
-+unsigned long noinline
-+__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
-+{
-+ int atomic;
-+
-+ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
-+ memcpy(to, (const void *)from, n);
-+ return 0;
-+ }
-+
-+ /* the mmap semaphore is taken only if not in an atomic context */
-+ atomic = in_atomic();
-+
-+ if (!atomic)
-+ down_read(&current->mm->mmap_sem);
-+ while (n) {
-+ pte_t *pte;
-+ spinlock_t *ptl;
-+ int tocopy;
-+
-+ while (!pin_page_for_read(from, &pte, &ptl)) {
-+ char temp;
-+ if (!atomic)
-+ up_read(&current->mm->mmap_sem);
-+ if (__get_user(temp, (char __user *)from))
-+ goto out;
-+ if (!atomic)
-+ down_read(&current->mm->mmap_sem);
-+ }
-+
-+ tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
-+ if (tocopy > n)
-+ tocopy = n;
-+
-+ memcpy(to, (const void *)from, tocopy);
-+ to += tocopy;
-+ from += tocopy;
-+ n -= tocopy;
-+
-+ pte_unmap_unlock(pte, ptl);
-+ }
-+ if (!atomic)
-+ up_read(&current->mm->mmap_sem);
-+
-+out:
-+ return n;
-+}
-+
- unsigned long
- __copy_to_user(void __user *to, const void *from, unsigned long n)
- {
-@@ -145,10 +238,25 @@ __copy_to_user(void __user *to, const vo
- * With frame pointer disabled, tail call optimization kicks in
- * as well making this test almost invisible.
- */
-- if (n < 64)
-+ if (n < COPY_TO_USER_THRESHOLD)
- return __copy_to_user_std(to, from, n);
- return __copy_to_user_memcpy(to, from, n);
- }
-+
-+unsigned long
-+__copy_from_user(void *to, const void __user *from, unsigned long n)
-+{
-+ /*
-+ * This test is stubbed out of the main function above to keep
-+ * the overhead for small copies low by avoiding a large
-+ * register dump on the stack just to reload them right away.
-+ * With frame pointer disabled, tail call optimization kicks in
-+ * as well making this test almost invisible.
-+ */
-+ if (n < COPY_FROM_USER_THRESHOLD)
-+ return __copy_from_user_std(to, from, n);
-+ return __copy_from_user_memcpy(to, from, n);
-+}
-
- static unsigned long noinline
- __clear_user_memset(void __user *addr, unsigned long n)