diff options
author | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
---|---|---|
committer | root <root@artemis.panaceas.org> | 2015-12-25 04:40:36 +0000 |
commit | 849369d6c66d3054688672f97d31fceb8e8230fb (patch) | |
tree | 6135abc790ca67dedbe07c39806591e70eda81ce /arch/arm/lib | |
download | linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip |
initial_commit
Diffstat (limited to 'arch/arm/lib')
54 files changed, 5493 insertions, 0 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile new file mode 100644 index 00000000..59ff42dd --- /dev/null +++ b/arch/arm/lib/Makefile @@ -0,0 +1,47 @@ +# +# linux/arch/arm/lib/Makefile +# +# Copyright (C) 1995-2000 Russell King +# + +lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ + csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ + delay.o findbit.o memchr.o memcpy.o \ + memmove.o memset.o memzero.o setbit.o \ + strncpy_from_user.o strnlen_user.o \ + strchr.o strrchr.o \ + testchangebit.o testclearbit.o testsetbit.o \ + ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ + ucmpdi2.o lib1funcs.o div64.o sha1.o \ + io-readsb.o io-writesb.o io-readsl.o io-writesl.o + +mmu-y := clear_user.o copy_page.o getuser.o putuser.o + +# the code in uaccess.S is not preemption safe and +# probably faster on ARMv3 only +ifeq ($(CONFIG_PREEMPT),y) + mmu-y += copy_from_user.o copy_to_user.o +else +ifneq ($(CONFIG_CPU_32v3),y) + mmu-y += copy_from_user.o copy_to_user.o +else + mmu-y += uaccess.o +endif +endif + +# using lib_ here won't override already available weak symbols +obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o + +lib-$(CONFIG_MMU) += $(mmu-y) + +ifeq ($(CONFIG_CPU_32v3),y) + lib-y += io-readsw-armv3.o io-writesw-armv3.o +else + lib-y += io-readsw-armv4.o io-writesw-armv4.o +endif + +lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o +lib-$(CONFIG_ARCH_SHARK) += io-shark.o + +$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S +$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S new file mode 100644 index 00000000..638deb13 --- /dev/null +++ b/arch/arm/lib/ashldi3.S @@ -0,0 +1,53 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashldi3) +ENTRY(__aeabi_llsl) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) + mov al, al, lsl r2 + mov pc, lr + +ENDPROC(__ashldi3) +ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S new file mode 100644 index 00000000..015e8aa5 --- /dev/null +++ b/arch/arm/lib/ashrdi3.S @@ -0,0 +1,53 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__ashrdi3) +ENTRY(__aeabi_lasr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, asr r2 + mov pc, lr + +ENDPROC(__ashrdi3) +ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S new file mode 100644 index 00000000..a673297b --- /dev/null +++ b/arch/arm/lib/backtrace.S @@ -0,0 +1,158 @@ +/* + * linux/arch/arm/lib/backtrace.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +@ fp is 0 or stack frame + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define offset r8 + +ENTRY(__backtrace) + mov r1, #0x10 + mov r0, fp + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + mov pc, lr +ENDPROC(__backtrace) +ENDPROC(c_backtrace) +#else + stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + + tst r1, #0x10 @ 26 or 32-bit mode? + ARM( moveq mask, #0xfc000003 ) + THUMB( moveq mask, #0xfc000000 ) + THUMB( orreq mask, #0x03 ) + movne mask, #0 @ mask for 32-bit + +1: stmfd sp!, {pc} @ calculate offset of PC stored + ldr r0, [sp], #4 @ by stmfd for this CPU + adr r1, 1b + sub offset, r0, r1 + +/* + * Stack frame layout: + * optionally saved caller registers (r4 - r10) + * saved fp + * saved sp + * saved lr + * frame => saved pc + * optionally saved arguments (r0 - r3) + * saved sp => <next word> + * + * Functions start with the following code sequence: + * mov ip, sp + * stmfd sp!, {r0 - r3} (optional) + * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} + */ +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +1001: ldr sv_pc, [frame, #0] @ get saved pc +1002: ldr sv_fp, [frame, #-12] @ get saved fp + + sub sv_pc, sv_pc, offset @ Correct PC for prefetching + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 @ adjust saved 'pc' back one + teq r3, r2, lsr #10 @ instruction + subne r0, sv_pc, #4 @ allow for mov + subeq r0, sv_pc, #8 @ allow for mov + stmia + + ldr r1, [frame, #-4] @ get saved lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + bl dump_backtrace_entry + + ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, + ldr r3, .Ldsi+4 + teq r3, r1, lsr #10 + ldreq r0, [frame, #-8] @ get sp + subeq r0, r0, #4 @ point at the last arg + bleq .Ldumpstm @ dump saved registers + +1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} + ldr r3, .Ldsi @ instruction exists, + teq r3, r1, lsr #10 + subeq r0, frame, #16 + bleq .Ldumpstm @ dump saved registers + + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, frame + bl printk +no_frame: ldmfd sp!, {r4 - r8, pc} +ENDPROC(__backtrace) +ENDPROC(c_backtrace) + + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .popsection + +#define instr r4 +#define reg r5 +#define stack r6 + +.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} + mov stack, r0 + mov instr, r1 + mov reg, #10 + mov r7, #0 +1: mov r3, #1 + ARM( tst instr, r3, lsl reg ) + THUMB( lsl r3, reg ) + THUMB( tst instr, r3 ) + beq 2f + add r7, r7, #1 + teq r7, #6 + moveq r7, #1 + moveq r1, #'\n' + movne r1, #' ' + ldr r3, [stack], #-4 + mov r2, reg + adr r0, .Lfp + bl printk +2: subs reg, reg, #1 + bpl 1b + teq r7, #0 + adrne r0, .Lcr + blne printk + ldmfd sp!, {instr, reg, stack, r7, pc} + +.Lfp: .asciz "%cr%d:%08x" +.Lcr: .asciz "\n" +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Ldsi: .word 0xe92dd800 >> 10 @ stmfd sp!, {... fp, ip, lr, pc} + .word 0xe92d0000 >> 10 @ stmfd sp!, {} + +#endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h new file mode 100644 index 00000000..10d868a5 --- /dev/null +++ b/arch/arm/lib/bitops.h @@ -0,0 +1,77 @@ +#if __LINUX_ARM_ARCH__ >= 6 + .macro bitop, instr + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 +1: ldrex r2, [r1] + \instr r2, r2, r3 + strex r0, r2, [r1] + cmp r0, #0 + bne 1b + bx lr + .endm + + .macro testop, instr, store + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + mov r2, #1 + and r3, r0, #31 @ Get bit offset + mov r0, r0, lsr #5 + add r1, r1, r0, lsl #2 @ Get word offset + mov r3, r2, lsl r3 @ create mask + smp_dmb +1: ldrex r2, [r1] + ands r0, r2, r3 @ save old value of bit + \instr r2, r2, r3 @ toggle bit + strex ip, r2, [r1] + cmp ip, #0 + bne 1b + smp_dmb + cmp r0, #0 + movne r0, #1 +2: bx lr + .endm +#else + .macro bitop, instr + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r2, r0, #31 + mov r0, r0, lsr #5 + mov r3, #1 + mov r3, r3, lsl r2 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2] + \instr r2, r2, r3 + str r2, [r1, r0, lsl #2] + restore_irqs ip + mov pc, lr + .endm + +/** + * testop - implement a test_and_xxx_bit operation. + * @instr: operational instruction + * @store: store instruction + * + * Note: we can trivially conditionalise the store instruction + * to avoid dirtying the data cache. + */ + .macro testop, instr, store + ands ip, r1, #3 + strneb r1, [ip] @ assert word-aligned + and r3, r0, #31 + mov r0, r0, lsr #5 + save_and_disable_irqs ip + ldr r2, [r1, r0, lsl #2]! + mov r0, #1 + tst r2, r0, lsl r3 + \instr r2, r2, r0, lsl r3 + \store r2, [r1] + moveq r0, #0 + restore_irqs ip + mov pc, lr + .endm +#endif diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S new file mode 100644 index 00000000..68ed5b62 --- /dev/null +++ b/arch/arm/lib/changebit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/changebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_change_bit) + bitop eor +ENDPROC(_change_bit) diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S new file mode 100644 index 00000000..14a0d988 --- /dev/null +++ b/arch/arm/lib/clear_user.S @@ -0,0 +1,54 @@ +/* + * linux/arch/arm/lib/clear_user.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Prototype: int __clear_user(void *addr, size_t sz) + * Purpose : clear some user memory + * Params : addr - user memory address to clear + * : sz - number of bytes to clear + * Returns : number of bytes NOT cleared + */ +ENTRY(__clear_user_std) +WEAK(__clear_user) + stmfd sp!, {r1, lr} + mov r2, #0 + cmp r1, #4 + blt 2f + ands ip, r0, #3 + beq 1f + cmp ip, #2 + strusr r2, r0, 1 + strusr r2, r0, 1, le + strusr r2, r0, 1, lt + rsb ip, ip, #4 + sub r1, r1, ip @ 7 6 5 4 3 2 1 +1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 + strusr r2, r0, 4, pl, rept=2 + bpl 1b + adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 + strusr r2, r0, 4, pl +2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x + strusr r2, r0, 1, ne, rept=2 + tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 + it ne @ explicit IT needed for the label +USER( strnebt r2, [r0]) + mov r0, #0 + ldmfd sp!, {r1, pc} +ENDPROC(__clear_user) +ENDPROC(__clear_user_std) + + .pushsection .fixup,"ax" + .align 0 +9001: ldmfd sp!, {r0, pc} + .popsection + diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S new file mode 100644 index 00000000..4c04c3b5 --- /dev/null +++ b/arch/arm/lib/clearbit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/clearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_clear_bit) + bitop bic +ENDPROC(_clear_bit) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S new file mode 100644 index 00000000..66a477a3 --- /dev/null +++ b/arch/arm/lib/copy_from_user.S @@ -0,0 +1,104 @@ +/* + * linux/arch/arm/lib/copy_from_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Prototype: + * + * size_t __copy_from_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to kernel memory from user memory + * + * Params: + * + * to = kernel memory + * from = user memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#ifndef CONFIG_THUMB2_KERNEL +#define LDR1W_SHIFT 0 +#else +#define LDR1W_SHIFT 1 +#endif +#define STR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + ldrusr \reg, \ptr, 4, abort=\abort + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldr1w \ptr, \reg1, \abort + ldr1w \ptr, \reg2, \abort + ldr1w \ptr, \reg3, \abort + ldr1w \ptr, \reg4, \abort + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort + ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort + .endm + + .macro ldr1b ptr reg cond=al abort + ldrusr \reg, \ptr, 1, \cond, abort=\abort + .endm + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm + + .macro enter reg1 reg2 + mov r3, #0 + stmdb sp!, {r0, r2, r3, \reg1, \reg2} + .endm + + .macro exit reg1 reg2 + add sp, sp, #8 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +ENTRY(__copy_from_user) + +#include "copy_template.S" + +ENDPROC(__copy_from_user) + + .pushsection .fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2} + sub r3, r0, r1 + rsb r1, r3, r2 + str r1, [sp] + bl __memzero + ldr r0, [sp], #4 + copy_abort_end + .popsection + diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S new file mode 100644 index 00000000..6ee2f670 --- /dev/null +++ b/arch/arm/lib/copy_page.S @@ -0,0 +1,47 @@ +/* + * linux/arch/arm/lib/copypage.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 )) + + .text + .align 5 +/* + * StrongARM optimised copy_page routine + * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s) + * Note that we probably achieve closer to the 100MB/s target with + * the core clock switching. + */ +ENTRY(copy_page) + stmfd sp!, {r4, lr} @ 2 + PLD( pld [r1, #0] ) + PLD( pld [r1, #L1_CACHE_BYTES] ) + mov r2, #COPY_COUNT @ 1 + ldmia r1!, {r3, r4, ip, lr} @ 4+1 +1: PLD( pld [r1, #2 * L1_CACHE_BYTES]) + PLD( pld [r1, #3 * L1_CACHE_BYTES]) +2: + .rept (2 * L1_CACHE_BYTES / 16 - 1) + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmia r1!, {r3, r4, ip, lr} @ 4 + .endr + subs r2, r2, #1 @ 1 + stmia r0!, {r3, r4, ip, lr} @ 4 + ldmgtia r1!, {r3, r4, ip, lr} @ 4 + bgt 1b @ 1 + PLD( ldmeqia r1!, {r3, r4, ip, lr} ) + PLD( beq 2b ) + ldmfd sp!, {r4, pc} @ 3 +ENDPROC(copy_page) diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S new file mode 100644 index 00000000..805e3f8f --- /dev/null +++ b/arch/arm/lib/copy_template.S @@ -0,0 +1,267 @@ +/* + * linux/arch/arm/lib/copy_template.s + * + * Code template for optimized memory copy functions + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Theory of operation + * ------------------- + * + * This file provides the core code for a forward memory copy used in + * the implementation of memcopy(), copy_to_user() and copy_from_user(). + * + * The including file must define the following accessor macros + * according to the need of the given function: + * + * ldr1w ptr reg abort + * + * This loads one word from 'ptr', stores it in 'reg' and increments + * 'ptr' to the next word. The 'abort' argument is used for fixup tables. + * + * ldr4w ptr reg1 reg2 reg3 reg4 abort + * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * + * This loads four or eight words starting from 'ptr', stores them + * in provided registers and increments 'ptr' past those words. + * The'abort' argument is used for fixup tables. + * + * ldr1b ptr reg cond abort + * + * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. + * It also must apply the condition code if provided, otherwise the + * "al" condition is assumed by default. + * + * str1w ptr reg abort + * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + * str1b ptr reg cond abort + * + * Same as their ldr* counterparts, but data is stored to 'ptr' location + * rather than being loaded. + * + * enter reg1 reg2 + * + * Preserve the provided registers on the stack plus any additional + * data as needed by the implementation including this code. Called + * upon code entry. + * + * exit reg1 reg2 + * + * Restore registers with the values previously saved with the + * 'preserv' macro. Called upon code termination. + * + * LDR1W_SHIFT + * STR1W_SHIFT + * + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) + */ + + + enter r4, lr + + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, r3 ) @ C gets set + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #0] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +3: PLD( pld [r1, #124] ) +4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + subs r2, r2, #32 + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 +#if LDR1W_SHIFT > 0 + lsl ip, ip, #LDR1W_SHIFT +#endif + addne pc, pc, ip @ C is always clear here + b 7f +6: + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr + ldr1w r1, r3, abort=20f + ldr1w r1, r4, abort=20f + ldr1w r1, r5, abort=20f + ldr1w r1, r6, abort=20f + ldr1w r1, r7, abort=20f + ldr1w r1, r8, abort=20f + ldr1w r1, lr, abort=20f + +#if LDR1W_SHIFT < STR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT +#elif LDR1W_SHIFT > STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT +#endif + add pc, pc, ip + nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr + str1w r0, r3, abort=20f + str1w r0, r4, abort=20f + str1w r0, r5, abort=20f + str1w r0, r6, abort=20f + str1w r0, r7, abort=20f + str1w r0, r8, abort=20f + str1w r0, lr, abort=20f + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5 - r8} + +8: movs r2, r2, lsl #31 + ldr1b r1, r3, ne, abort=21f + ldr1b r1, r4, cs, abort=21f + ldr1b r1, ip, cs, abort=21f + str1b r0, r3, ne, abort=21f + str1b r0, r4, cs, abort=21f + str1b r0, ip, cs, abort=21f + + exit r4, pc + +9: rsb ip, ip, #4 + cmp ip, #2 + ldr1b r1, r3, gt, abort=21f + ldr1b r1, r4, ge, abort=21f + ldr1b r1, lr, abort=21f + str1b r0, r3, gt, abort=21f + str1b r0, r4, ge, abort=21f + subs r2, r2, ip + str1b r0, lr, abort=21f + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr1w r1, lr, abort=21f + beq 17f + bgt 18f + + + .macro forward_copy_shift pull push + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5 - r9} + + PLD( pld [r1, #0] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #28] ) + PLD( blt 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) + +12: PLD( pld [r1, #124] ) +13: ldr4w r1, r4, r5, r6, r7, abort=19f + mov r3, lr, pull #\pull + subs r2, r2, #32 + ldr4w r1, r8, r9, ip, lr, abort=19f + orr r3, r3, r4, push #\push + mov r4, r4, pull #\pull + orr r4, r4, r5, push #\push + mov r5, r5, pull #\pull + orr r5, r5, r6, push #\push + mov r6, r6, pull #\pull + orr r6, r6, r7, push #\push + mov r7, r7, pull #\pull + orr r7, r7, r8, push #\push + mov r8, r8, pull #\pull + orr r8, r8, r9, push #\push + mov r9, r9, pull #\pull + orr r9, r9, ip, push #\push + mov ip, ip, pull #\pull + orr ip, ip, lr, push #\push + str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5 - r9} + +14: ands ip, r2, #28 + beq 16f + +15: mov r3, lr, pull #\pull + ldr1w r1, lr, abort=21f + subs ip, ip, #4 + orr r3, r3, lr, push #\push + str1w r0, r3, abort=21f + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: sub r1, r1, #(\push / 8) + b 8b + + .endm + + + forward_copy_shift pull=8 push=24 + +17: forward_copy_shift pull=16 push=16 + +18: forward_copy_shift pull=24 push=8 + + +/* + * Abort preamble and completion macros. + * If a fixup handler is required then those macros must surround it. + * It is assumed that the fixup code will handle the private part of + * the exit macro. + */ + + .macro copy_abort_preamble +19: ldmfd sp!, {r5 - r9} + b 21f +20: ldmfd sp!, {r5 - r8} +21: + .endm + + .macro copy_abort_end + ldmfd sp!, {r4, pc} + .endm + diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S new file mode 100644 index 00000000..d066df68 --- /dev/null +++ b/arch/arm/lib/copy_to_user.S @@ -0,0 +1,106 @@ +/* + * linux/arch/arm/lib/copy_to_user.S + * + * Author: Nicolas Pitre + * Created: Sep 29, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Prototype: + * + * size_t __copy_to_user(void *to, const void *from, size_t n) + * + * Purpose: + * + * copy a block to user memory from kernel memory + * + * Params: + * + * to = user memory + * from = kernel memory + * n = number of bytes to copy + * + * Return value: + * + * Number of bytes NOT copied. + */ + +#define LDR1W_SHIFT 0 +#ifndef CONFIG_THUMB2_KERNEL +#define STR1W_SHIFT 0 +#else +#define STR1W_SHIFT 1 +#endif + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm + + .macro str1w ptr reg abort + strusr \reg, \ptr, 4, abort=\abort + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + str1w \ptr, \reg1, \abort + str1w \ptr, \reg2, \abort + str1w \ptr, \reg3, \abort + str1w \ptr, \reg4, \abort + str1w \ptr, \reg5, \abort + str1w \ptr, \reg6, \abort + str1w \ptr, \reg7, \abort + str1w \ptr, \reg8, \abort + .endm + + .macro str1b ptr reg cond=al abort + strusr \reg, \ptr, 1, \cond, abort=\abort + .endm + + .macro enter reg1 reg2 + mov r3, #0 + stmdb sp!, {r0, r2, r3, \reg1, \reg2} + .endm + + .macro exit reg1 reg2 + add sp, sp, #8 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +ENTRY(__copy_to_user_std) +WEAK(__copy_to_user) + +#include "copy_template.S" + +ENDPROC(__copy_to_user) +ENDPROC(__copy_to_user_std) + + .pushsection .fixup,"ax" + .align 0 + copy_abort_preamble + ldmfd sp!, {r1, r2, r3} + sub r0, r0, r1 + rsb r0, r0, r2 + copy_abort_end + .popsection + diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S new file mode 100644 index 00000000..3ac6ef01 --- /dev/null +++ b/arch/arm/lib/csumipv6.S @@ -0,0 +1,33 @@ +/* + * linux/arch/arm/lib/csumipv6.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + str lr, [sp, #-4]! + adds ip, r2, r3 + ldmia r1, {r1 - r3, lr} + adcs ip, ip, r1 + adcs ip, ip, r2 + adcs ip, ip, r3 + adcs ip, ip, lr + ldmia r0, {r0 - r3} + adcs r0, ip, r0 + adcs r0, r0, r1 + adcs r0, r0, r2 + ldr r2, [sp, #4] + adcs r0, r0, r3 + adcs r0, r0, r2 + adcs r0, r0, #0 + ldmfd sp!, {pc} +ENDPROC(__csum_ipv6_magic) + diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S new file mode 100644 index 00000000..31d3cb34 --- /dev/null +++ b/arch/arm/lib/csumpartial.S @@ -0,0 +1,142 @@ +/* + * linux/arch/arm/lib/csumpartial.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 @ save before use +td2 .req r5 @ save before use +td3 .req lr + +.Lzero: mov r0, sum + add sp, sp, #4 + ldr pc, [sp], #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst buf, #1 @ odd address? + movne sum, sum, ror #8 + ldrneb td0, [buf], #1 + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 + +.Lless4: tst len, #6 + beq .Lless8_byte + + /* we are now half-word aligned */ + +.Lless8_wordlp: +#if __LINUX_ARM_ARCH__ >= 4 + ldrh td0, [buf], #2 + sub len, len, #2 +#else + ldrb td0, [buf], #1 + ldrb td3, [buf], #1 + sub len, len, #2 +#ifndef __ARMEB__ + orr td0, td0, td3, lsl #8 +#else + orr td0, td3, td0, lsl #8 +#endif +#endif + adcs sum, sum, td0 + tst len, #6 + bne .Lless8_wordlp + +.Lless8_byte: tst len, #1 @ odd number of bytes + ldrneb td0, [buf], #1 @ include last byte + adcnes sum, sum, td0, put_byte_0 @ update checksum + +.Ldone: adc r0, sum, #0 @ collect up the last carry + ldr td0, [sp], #4 + tst td0, #1 @ check buffer alignment + movne r0, r0, ror #8 @ rotate checksum by 8 bits + ldr pc, [sp], #4 @ return + +.Lnot_aligned: tst buf, #1 @ odd address + ldrneb td0, [buf], #1 @ make even + subne len, len, #1 + adcnes sum, sum, td0, put_byte_1 @ update checksum + + tst buf, #2 @ 32-bit aligned? +#if __LINUX_ARM_ARCH__ >= 4 + ldrneh td0, [buf], #2 @ make 32-bit aligned + subne len, len, #2 +#else + ldrneb td0, [buf], #1 + ldrneb ip, [buf], #1 + subne len, len, #2 +#ifndef __ARMEB__ + orrne td0, td0, ip, lsl #8 +#else + orrne td0, ip, td0, lsl #8 +#endif +#endif + adcnes sum, sum, td0 @ update checksum + mov pc, lr + +ENTRY(csum_partial) + stmfd sp!, {buf, lr} + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + tst buf, #1 + movne sum, sum, ror #8 + + adds sum, sum, #0 @ C = 0 + tst buf, #3 @ Test destination alignment + blne .Lnot_aligned @ align destination, return here + +1: bics ip, len, #31 + beq 3f + + stmfd sp!, {r4 - r5} +2: ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + ldmia buf!, {td0, td1, td2, td3} + adcs sum, sum, td0 + adcs sum, sum, td1 + adcs sum, sum, td2 + adcs sum, sum, td3 + sub ip, ip, #32 + teq ip, #0 + bne 2b + ldmfd sp!, {r4 - r5} + +3: tst len, #0x1c @ should not change C + beq .Lless4 + +4: ldr td0, [buf], #4 + sub len, len, #4 + adcs sum, sum, td0 + tst len, #0x1c + bne 4b + b .Lless4 +ENDPROC(csum_partial) diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S new file mode 100644 index 00000000..d03fc71f --- /dev/null +++ b/arch/arm/lib/csumpartialcopy.S @@ -0,0 +1,53 @@ +/* + * linux/arch/arm/lib/csumpartialcopy.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + stmfd sp!, {r1, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r4 - r8, pc} + .endm + + .macro load1b, reg1 + ldrb \reg1, [r0], #1 + .endm + + .macro load2b, reg1, reg2 + ldrb \reg1, [r0], #1 + ldrb \reg2, [r0], #1 + .endm + + .macro load1l, reg1 + ldr \reg1, [r0], #4 + .endm + + .macro load2l, reg1, reg2 + ldr \reg1, [r0], #4 + ldr \reg2, [r0], #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldmia r0!, {\reg1, \reg2, \reg3, \reg4} + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S new file mode 100644 index 00000000..d620a5f2 --- /dev/null +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -0,0 +1,332 @@ +/* + * linux/arch/arm/lib/csumpartialcopygeneric.S + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.Lzero: mov r0, sum + load_regs + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.Ldst_unaligned: + tst dst, #1 + beq .Ldst_16bit + + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst dst, #2 + moveq pc, lr @ dst is now 32bit aligned + +.Ldst_16bit: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 + mov pc, lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: teq len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + tst dst, #1 @ dst 16-bit aligned + beq .Lless8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + adcs sum, sum, ip, put_byte_1 @ update checksum + strb ip, [dst], #1 + tst len, #6 + beq .Lless8_byteonly + +1: load2b r8, ip + sub len, len, #2 + adcs sum, sum, r8, put_byte_0 + strb r8, [dst], #1 + adcs sum, sum, ip, put_byte_1 + strb ip, [dst], #1 +.Lless8_aligned: + tst len, #6 + bne 1b +.Lless8_byteonly: + tst len, #1 + beq .Ldone + load1b r8 + adcs sum, sum, r8, put_byte_0 @ update checksum + strb r8, [dst], #1 + b .Ldone + +FN_ENTRY + save_regs + + cmp len, #8 @ Ensure that we have at least + blo .Lless8 @ 8 bytes to copy. + + adds sum, sum, #0 @ C = 0 + tst dst, #3 @ Test destination alignment + blne .Ldst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + tst src, #3 @ Test source alignment + bne .Lsrc_not_aligned + + /* Routine for src & dst aligned */ + + bics ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + sub ip, ip, #16 + teq ip, #0 + bne 1b + +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r4, r5 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + tst ip, #4 + beq 4f + +3: load1l r4 + str r4, [dst], #4 + adcs sum, sum, r4 + +4: ands len, len, #3 + beq .Ldone + load1l r4 + tst len, #2 + mov r5, r4, get_byte_0 + beq .Lexit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 +.Lexit: tst len, #1 + strneb r5, [dst], #1 + andne r5, r5, #255 + adcnes sum, sum, r5, put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.Ldone: adc r0, sum, #0 + ldr sum, [sp, #0] @ dst + tst sum, #1 + movne r0, r0, ror #8 + load_regs + +.Lsrc_not_aligned: + adc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + bic src, src, #3 + load1l r5 + cmp ip, #2 + beq .Lsrc2_aligned + bhi .Lsrc3_aligned + mov r4, r5, pull #8 @ C = 0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + mov r7, r7, pull #8 + orr r7, r7, r8, push #24 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #8 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #8 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #24 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #8 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4, push #16 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + mov r5, r4, get_byte_2 + b .Lexit + +.Lsrc2_aligned: mov r4, r5, pull #16 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + mov r7, r7, pull #16 + orr r7, r7, r8, push #16 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #16 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #16 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #16 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #16 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + adcs sum, sum, r4 + strb r5, [dst], #1 + mov r5, r4, get_byte_1 + strb r5, [dst], #1 + tst len, #1 + beq .Ldone + load1b r5 + b .Lexit + +.Lsrc3_aligned: mov r4, r5, pull #24 + adds sum, sum, #0 + bics ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + mov r7, r7, pull #24 + orr r7, r7, r8, push #8 + stmia dst!, {r4, r5, r6, r7} + adcs sum, sum, r4 + adcs sum, sum, r5 + adcs sum, sum, r6 + adcs sum, sum, r7 + mov r4, r8, pull #24 + sub ip, ip, #16 + teq ip, #0 + bne 1b +2: ands ip, len, #12 + beq 4f + tst ip, #8 + beq 3f + load2l r5, r6 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + stmia dst!, {r4, r5} + adcs sum, sum, r4 + adcs sum, sum, r5 + mov r4, r6, pull #24 + tst ip, #4 + beq 4f +3: load1l r5 + orr r4, r4, r5, push #8 + str r4, [dst], #4 + adcs sum, sum, r4 + mov r4, r5, pull #24 +4: ands len, len, #3 + beq .Ldone + mov r5, r4, get_byte_0 + tst len, #2 + beq .Lexit + strb r5, [dst], #1 + adcs sum, sum, r4 + load1l r4 + mov r5, r4, get_byte_0 + strb r5, [dst], #1 + adcs sum, sum, r4, push #24 + mov r5, r4, get_byte_1 + b .Lexit +FN_EXIT diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S new file mode 100644 index 00000000..7d08b43d --- /dev/null +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -0,0 +1,83 @@ +/* + * linux/arch/arm/lib/csumpartialcopyuser.S + * + * Copyright (C) 1995-1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/asm-offsets.h> + + .text + + .macro save_regs + stmfd sp!, {r1, r2, r4 - r8, lr} + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, pc} + .endm + + .macro load1b, reg1 + ldrusr \reg1, r0, 1 + .endm + + .macro load2b, reg1, reg2 + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 + .endm + + .macro load1l, reg1 + ldrusr \reg1, r0, 4 + .endm + + .macro load2l, reg1, reg2 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 + .endm + +/* + * unsigned int + * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .pushsection .fixup,"ax" + .align 4 +9001: mov r4, #-EFAULT + ldr r5, [sp, #8*4] @ *err_ptr + str r4, [r5] + ldmia sp, {r1, r2} @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +9002: teq r2, r1 + strneb r0, [r1], #1 + bne 9002b + load_regs + .popsection diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S new file mode 100644 index 00000000..3c9a05c8 --- /dev/null +++ b/arch/arm/lib/delay.S @@ -0,0 +1,69 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/param.h> + .text + +.LC0: .word loops_per_jiffy +.LC1: .word (2199023*HZ)>>11 + +/* + * r0 <= 2000 + * lpj <= 0x01ffffff (max. 3355 bogomips) + * HZ <= 1000 + */ + +ENTRY(__udelay) + ldr r2, .LC1 + mul r0, r2, r0 +ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 + mov r1, #-1 + ldr r2, .LC0 + ldr r2, [r2] @ max = 0x01ffffff + add r0, r0, r1, lsr #32-14 + mov r0, r0, lsr #14 @ max = 0x0001ffff + add r2, r2, r1, lsr #32-10 + mov r2, r2, lsr #10 @ max = 0x00007fff + mul r0, r2, r0 @ max = 2^32-1 + add r0, r0, r1, lsr #32-6 + movs r0, r0, lsr #6 + moveq pc, lr + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + * + * Oh, if only we had a cycle counter... + */ + +@ Delay routine +ENTRY(__delay) + subs r0, r0, #1 +#if 0 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 +#endif + bhi __delay + mov pc, lr +ENDPROC(__udelay) +ENDPROC(__const_udelay) +ENDPROC(__delay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 00000000..faa77481 --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,203 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subcss xh, xh, yl + movnes ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + movlo pc, lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + mov pc, lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + moveq pc, lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + mov pc, lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + mov pc, lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + moveq pc, lr + + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +ENDPROC(__do_div64) diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S new file mode 100644 index 00000000..8678eb2b --- /dev/null +++ b/arch/arm/lib/ecard.S @@ -0,0 +1,45 @@ +/* + * linux/arch/arm/lib/ecard.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <mach/hardware.h> + +#define CPSR2SPSR(rt) \ + mrs rt, cpsr; \ + msr spsr_cxsf, rt + +@ Purpose: call an expansion card loader to read bytes. +@ Proto : char read_loader(int offset, char *card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_read) + stmfd sp!, {r4 - r12, lr} + mov r11, r1 + mov r1, r0 + CPSR2SPSR(r0) + mov lr, pc + mov pc, r2 + ldmfd sp!, {r4 - r12, pc} + +@ Purpose: call an expansion card loader to reset the card +@ Proto : void read_loader(int card_base, char *loader); +@ Returns: byte read + +ENTRY(ecard_loader_reset) + stmfd sp!, {r4 - r12, lr} + mov r11, r0 + CPSR2SPSR(r0) + mov lr, pc + add pc, r1, #8 + ldmfd sp!, {r4 - r12, pc} + diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S new file mode 100644 index 00000000..64f6bc1a --- /dev/null +++ b/arch/arm/lib/findbit.S @@ -0,0 +1,196 @@ +/* + * linux/arch/arm/lib/findbit.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16th March 2001 - John Ripley <jripley@sonicblue.com> + * Fixed so that "size" is an exclusive not an inclusive quantity. + * All users of these functions expect exclusive sizes, and may + * also call with zero size. + * Reworked by rmk. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + +/* + * Purpose : Find a 'zero' bit + * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); + */ +ENTRY(_find_first_zero_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr +ENDPROC(_find_first_zero_bit_le) + +/* + * Purpose : Find next 'zero' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_zero_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_le) + +/* + * Purpose : Find a 'one' bit + * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); + */ +ENTRY(_find_first_bit_le) + teq r1, #0 + beq 3f + mov r2, #0 +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr +ENDPROC(_find_first_bit_le) + +/* + * Purpose : Find next 'one' bit + * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) + */ +ENTRY(_find_next_bit_le) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_le) + +#ifdef __ARMEB__ + +ENTRY(_find_first_zero_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eors r3, r3, #0xff @ invert bits + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr +ENDPROC(_find_first_zero_bit_be) + +ENTRY(_find_next_zero_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + eor r3, r3, #0xff @ now looking for a 1 bit + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_zero_bit_be) + +ENTRY(_find_first_bit_be) + teq r1, #0 + beq 3f + mov r2, #0 +1: eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 + bne .L_found @ any now set - found zero bit + add r2, r2, #8 @ next bit pointer +2: cmp r2, r1 @ any more? + blo 1b +3: mov r0, r1 @ no free bits + mov pc, lr +ENDPROC(_find_first_bit_be) + +ENTRY(_find_next_bit_be) + teq r1, #0 + beq 3b + ands ip, r2, #7 + beq 1b @ If new byte, goto old routine + eor r3, r2, #0x18 @ big endian byte ordering + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + movs r3, r3, lsr ip @ shift off unused bits + bne .L_found + orr r2, r2, #7 @ if zero, then no bits here + add r2, r2, #1 @ align bit pointer + b 2b @ loop for next bit +ENDPROC(_find_next_bit_be) + +#endif + +/* + * One or more bits in the LSB of r3 are assumed to be set. + */ +.L_found: +#if __LINUX_ARM_ARCH__ >= 5 + rsb r0, r3, #0 + and r3, r3, r0 + clz r3, r3 + rsb r3, r3, #31 + add r0, r2, r3 +#else + tst r3, #0x0f + addeq r2, r2, #4 + movne r3, r3, lsl #4 + tst r3, #0x30 + addeq r2, r2, #2 + movne r3, r3, lsl #2 + tst r3, #0x40 + addeq r2, r2, #1 + mov r0, r2 +#endif + cmp r1, r0 @ Clamp to maxbit + movlo r0, r1 + mov pc, lr + diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S new file mode 100644 index 00000000..617150b1 --- /dev/null +++ b/arch/arm/lib/floppydma.S @@ -0,0 +1,32 @@ +/* + * linux/arch/arm/lib/floppydma.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + .text + + .global floppy_fiqin_end +ENTRY(floppy_fiqin_start) + subs r9, r9, #1 + ldrgtb r12, [r11, #-4] + ldrleb r12, [r11], #0 + strb r12, [r10], #1 + subs pc, lr, #4 +floppy_fiqin_end: + + .global floppy_fiqout_end +ENTRY(floppy_fiqout_start) + subs r9, r9, #1 + ldrgeb r12, [r10], #1 + movlt r12, #0 + strleb r12, [r11], #0 + subles pc, lr, #4 + strb r12, [r11, #-4] + subs pc, lr, #4 +floppy_fiqout_end: diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S new file mode 100644 index 00000000..11093a7c --- /dev/null +++ b/arch/arm/lib/getuser.S @@ -0,0 +1,73 @@ +/* + * linux/arch/arm/lib/getuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make them more + * efficient, especially as they return an error value in addition to + * the "real" return value. + * + * __get_user_X + * + * Inputs: r0 contains the address + * Outputs: r0 is the error code + * r2, r3 contains the zero-extended value + * lr corrupted + * + * No other registers must be altered. (see <asm/uaccess.h> + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000. + * Note also that it is intended that __get_user_bad is not global. + */ +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/domain.h> + +ENTRY(__get_user_1) +1: TUSER(ldrb) r2, [r0] + mov r0, #0 + mov pc, lr +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) +#ifdef CONFIG_THUMB2_KERNEL +2: TUSER(ldrb) r2, [r0] +3: TUSER(ldrb) r3, [r0, #1] +#else +2: TUSER(ldrb) r2, [r0], #1 +3: TUSER(ldrb) r3, [r0] +#endif +#ifndef __ARMEB__ + orr r2, r2, r3, lsl #8 +#else + orr r2, r3, r2, lsl #8 +#endif + mov r0, #0 + mov pc, lr +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) +4: TUSER(ldr) r2, [r0] + mov r0, #0 + mov pc, lr +ENDPROC(__get_user_4) + +__get_user_bad: + mov r2, #0 + mov r0, #-EFAULT + mov pc, lr +ENDPROC(__get_user_bad) + +.pushsection __ex_table, "a" + .long 1b, __get_user_bad + .long 2b, __get_user_bad + .long 3b, __get_user_bad + .long 4b, __get_user_bad +.popsection diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S new file mode 100644 index 00000000..1b197ea7 --- /dev/null +++ b/arch/arm/lib/io-acorn.S @@ -0,0 +1,31 @@ +/* + * linux/arch/arm/lib/io-acorn.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align + +.Liosl_warning: + .ascii "<4>insl/outsl not implemented, called from %08lX\0" + .align + +/* + * These make no sense on Acorn machines. + * Print a warning message. + */ +ENTRY(insl) +ENTRY(outsl) + adr r0, .Liosl_warning + mov r1, lr + b printk diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S new file mode 100644 index 00000000..9f423898 --- /dev/null +++ b/arch/arm/lib/io-readsb.S @@ -0,0 +1,123 @@ +/* + * linux/arch/arm/lib/io-readsb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.Linsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1], #1 + subs r2, r2, ip + bne .Linsb_aligned + +ENTRY(__raw_readsb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .Linsb_align + +.Linsb_aligned: stmfd sp!, {r4 - r6, lr} + + subs r2, r2, #16 + bmi .Linsb_no_16 + +.Linsb_16_lp: ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + ldrb r5, [r0] + orr r4, r4, r6, put_byte_2 + ldrb r6, [r0] + orr r4, r4, ip, put_byte_3 + ldrb ip, [r0] + mov r5, r5, put_byte_0 + ldrb lr, [r0] + orr r5, r5, r6, put_byte_1 + ldrb r6, [r0] + orr r5, r5, ip, put_byte_2 + ldrb ip, [r0] + orr r5, r5, lr, put_byte_3 + ldrb lr, [r0] + mov r6, r6, put_byte_0 + orr r6, r6, ip, put_byte_1 + ldrb ip, [r0] + orr r6, r6, lr, put_byte_2 + orr r6, r6, ip, put_byte_3 + stmia r1!, {r3 - r6} + + subs r2, r2, #16 + bpl .Linsb_16_lp + + tst r2, #15 + ldmeqfd sp!, {r4 - r6, pc} + +.Linsb_no_16: tst r2, #8 + beq .Linsb_no_8 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + mov r3, r3, put_byte_0 + ldrb r6, [r0] + orr r3, r3, r4, put_byte_1 + ldrb r4, [r0] + orr r3, r3, r5, put_byte_2 + ldrb r5, [r0] + orr r3, r3, r6, put_byte_3 + ldrb r6, [r0] + mov r4, r4, put_byte_0 + ldrb ip, [r0] + orr r4, r4, r5, put_byte_1 + orr r4, r4, r6, put_byte_2 + orr r4, r4, ip, put_byte_3 + stmia r1!, {r3, r4} + +.Linsb_no_8: tst r2, #4 + beq .Linsb_no_4 + + ldrb r3, [r0] + ldrb r4, [r0] + ldrb r5, [r0] + ldrb r6, [r0] + mov r3, r3, put_byte_0 + orr r3, r3, r4, put_byte_1 + orr r3, r3, r5, put_byte_2 + orr r3, r3, r6, put_byte_3 + str r3, [r1], #4 + +.Linsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4 - r6, pc} + + cmp r2, #2 + ldrb r3, [r0] + strb r3, [r1], #1 + ldrgeb r3, [r0] + strgeb r3, [r1], #1 + ldrgtb r3, [r0] + strgtb r3, [r1] + + ldmfd sp!, {r4 - r6, pc} +ENDPROC(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S new file mode 100644 index 00000000..5fb97e7f --- /dev/null +++ b/arch/arm/lib/io-readsl.S @@ -0,0 +1,79 @@ +/* + * linux/arch/arm/lib/io-readsl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_readsl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldr r3, [r0, #0] + ldr r4, [r0, #0] + ldr ip, [r0, #0] + ldr lr, [r0, #0] + subs r2, r2, #4 + stmia r1!, {r3, r4, ip, lr} + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldrcs r3, [r0, #0] + ldrcs ip, [r0, #0] + stmcsia r1!, {r3, ip} + ldrne r3, [r0, #0] + strne r3, [r1, #0] + mov pc, lr + +3: ldr r3, [r0] + cmp ip, #2 + mov ip, r3, get_byte_0 + strb ip, [r1], #1 + bgt 6f + mov ip, r3, get_byte_1 + strb ip, [r1], #1 + beq 5f + mov ip, r3, get_byte_2 + strb ip, [r1], #1 + +4: subs r2, r2, #1 + mov ip, r3, pull #24 + ldrne r3, [r0] + orrne ip, ip, r3, push #8 + strne ip, [r1], #4 + bne 4b + b 8f + +5: subs r2, r2, #1 + mov ip, r3, pull #16 + ldrne r3, [r0] + orrne ip, ip, r3, push #16 + strne ip, [r1], #4 + bne 5b + b 7f + +6: subs r2, r2, #1 + mov ip, r3, pull #8 + ldrne r3, [r0] + orrne ip, ip, r3, push #24 + strne ip, [r1], #4 + bne 6b + + mov r3, ip, get_byte_2 + strb r3, [r1, #2] +7: mov r3, ip, get_byte_1 + strb r3, [r1, #1] +8: mov r3, ip, get_byte_0 + strb r3, [r1, #0] + mov pc, lr +ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S new file mode 100644 index 00000000..9aaf7c72 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv3.S @@ -0,0 +1,107 @@ +/* + * linux/arch/arm/lib/io-readsw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <mach/hardware.h> + +.Linsw_bad_alignment: + adr r0, .Linsw_bad_align_msg + mov r2, lr + b panic +.Linsw_bad_align_msg: + .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Linsw_align: tst r1, #1 + bne .Linsw_bad_alignment + + ldr r3, [r0] + strb r3, [r1], #1 + mov r3, r3, lsr #8 + strb r3, [r1], #1 + + subs r2, r2, #1 + moveq pc, lr + +ENTRY(__raw_readsw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .Linsw_align + +.Linsw_aligned: mov ip, #0xff + orr ip, ip, ip, lsl #8 + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + ldr r5, [r0] + and r5, r5, ip + ldr r6, [r0] + orr r5, r5, r6, lsl #16 + + ldr r6, [r0] + and r6, r6, ip + ldr lr, [r0] + orr r6, r6, lr, lsl #16 + + stmia r1!, {r3 - r6} + + subs r2, r2, #8 + bpl .Linsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + ldr r4, [r0] + and r4, r4, ip + ldr r5, [r0] + orr r4, r4, r5, lsl #16 + + stmia r1!, {r3, r4} + +.Lno_insw_4: tst r2, #2 + beq .Lno_insw_2 + + ldr r3, [r0] + and r3, r3, ip + ldr r4, [r0] + orr r3, r3, r4, lsl #16 + + str r3, [r1], #4 + +.Lno_insw_2: tst r2, #1 + ldrne r3, [r0] + strneb r3, [r1], #1 + movne r3, r3, lsr #8 + strneb r3, [r1] + + ldmfd sp!, {r4, r5, r6, pc} + + diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S new file mode 100644 index 00000000..1f393d42 --- /dev/null +++ b/arch/arm/lib/io-readsw-armv4.S @@ -0,0 +1,131 @@ +/* + * linux/arch/arm/lib/io-readsw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro pack, rd, hw1, hw2 +#ifndef __ARMEB__ + orr \rd, \hw1, \hw2, lsl #16 +#else + orr \rd, \hw2, \hw1, lsl #16 +#endif + .endm + +.Linsw_align: movs ip, r1, lsl #31 + bne .Linsw_noalign + ldrh ip, [r0] + sub r2, r2, #1 + strh ip, [r1], #2 + +ENTRY(__raw_readsw) + teq r2, #0 + moveq pc, lr + tst r1, #3 + bne .Linsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_insw_8 + +.Linsw_8_lp: ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh r5, [r0] + pack r4, r4, r5 + + ldrh r5, [r0] + ldrh ip, [r0] + pack r5, r5, ip + + ldrh ip, [r0] + ldrh lr, [r0] + pack ip, ip, lr + + subs r2, r2, #8 + stmia r1!, {r3 - r5, ip} + bpl .Linsw_8_lp + +.Lno_insw_8: tst r2, #4 + beq .Lno_insw_4 + + ldrh r3, [r0] + ldrh r4, [r0] + pack r3, r3, r4 + + ldrh r4, [r0] + ldrh ip, [r0] + pack r4, r4, ip + + stmia r1!, {r3, r4} + +.Lno_insw_4: movs r2, r2, lsl #31 + bcc .Lno_insw_2 + + ldrh r3, [r0] + ldrh ip, [r0] + pack r3, r3, ip + str r3, [r1], #4 + +.Lno_insw_2: ldrneh r3, [r0] + strneh r3, [r1] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define _BE_ONLY_(code...) code +#define _LE_ONLY_(code...) +#define push_hbyte0 lsr #8 +#define pull_hbyte1 lsl #24 +#else +#define _BE_ONLY_(code...) +#define _LE_ONLY_(code...) code +#define push_hbyte0 lsl #24 +#define pull_hbyte1 lsr #8 +#endif + +.Linsw_noalign: stmfd sp!, {r4, lr} + ldrccb ip, [r1, #-1]! + bcc 1f + + ldrh ip, [r0] + sub r2, r2, #1 + _BE_ONLY_( mov ip, ip, ror #8 ) + strb ip, [r1], #1 + _LE_ONLY_( mov ip, ip, lsr #8 ) + _BE_ONLY_( mov ip, ip, lsr #24 ) + +1: subs r2, r2, #2 + bmi 3f + _BE_ONLY_( mov ip, ip, lsl #24 ) + +2: ldrh r3, [r0] + ldrh r4, [r0] + subs r2, r2, #2 + orr ip, ip, r3, lsl #8 + orr ip, ip, r4, push_hbyte0 + str ip, [r1], #4 + mov ip, r4, pull_hbyte1 + bpl 2b + + _BE_ONLY_( mov ip, ip, lsr #24 ) + +3: tst r2, #1 + strb ip, [r1], #1 + ldrneh ip, [r0] + _BE_ONLY_( movne ip, ip, ror #8 ) + strneb ip, [r1], #1 + _LE_ONLY_( movne ip, ip, lsr #8 ) + _BE_ONLY_( movne ip, ip, lsr #24 ) + strneb ip, [r1] + ldmfd sp!, {r4, pc} +ENDPROC(__raw_readsw) diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c new file mode 100644 index 00000000..82425394 --- /dev/null +++ b/arch/arm/lib/io-shark.c @@ -0,0 +1,13 @@ +/* + * linux/arch/arm/lib/io-shark.c + * + * by Alexander Schulz + * + * derived from: + * linux/arch/arm/lib/io-ebsa.S + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S new file mode 100644 index 00000000..68b92f4a --- /dev/null +++ b/arch/arm/lib/io-writesb.S @@ -0,0 +1,94 @@ +/* + * linux/arch/arm/lib/io-writesb.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] + mov \rd, \rd, lsr #8 + strb \rd, [r0] +#else + mov lr, \rd, lsr #24 + strb lr, [r0] + mov lr, \rd, lsr #16 + strb lr, [r0] + mov lr, \rd, lsr #8 + strb lr, [r0] + strb \rd, [r0] +#endif + .endm + +.Loutsb_align: rsb ip, ip, #4 + cmp ip, r2 + movgt ip, r2 + cmp ip, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1], #1 + strgtb r3, [r0] + subs r2, r2, ip + bne .Loutsb_aligned + +ENTRY(__raw_writesb) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne .Loutsb_align + +.Loutsb_aligned: + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #16 + bmi .Loutsb_no_16 + +.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip} + outword r3 + outword r4 + outword r5 + outword ip + subs r2, r2, #16 + bpl .Loutsb_16_lp + + tst r2, #15 + ldmeqfd sp!, {r4, r5, pc} + +.Loutsb_no_16: tst r2, #8 + beq .Loutsb_no_8 + + ldmia r1!, {r3, r4} + outword r3 + outword r4 + +.Loutsb_no_8: tst r2, #4 + beq .Loutsb_no_4 + + ldr r3, [r1], #4 + outword r3 + +.Loutsb_no_4: ands r2, r2, #3 + ldmeqfd sp!, {r4, r5, pc} + + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0] + ldrgeb r3, [r1], #1 + strgeb r3, [r0] + ldrgtb r3, [r1] + strgtb r3, [r0] + + ldmfd sp!, {r4, r5, pc} +ENDPROC(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S new file mode 100644 index 00000000..8d3b7813 --- /dev/null +++ b/arch/arm/lib/io-writesl.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/io-writesl.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +ENTRY(__raw_writesl) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + ands ip, r1, #3 + bne 3f + + subs r2, r2, #4 + bmi 2f + stmfd sp!, {r4, lr} +1: ldmia r1!, {r3, r4, ip, lr} + subs r2, r2, #4 + str r3, [r0, #0] + str r4, [r0, #0] + str ip, [r0, #0] + str lr, [r0, #0] + bpl 1b + ldmfd sp!, {r4, lr} +2: movs r2, r2, lsl #31 + ldmcsia r1!, {r3, ip} + strcs r3, [r0, #0] + ldrne r3, [r1, #0] + strcs ip, [r0, #0] + strne r3, [r0, #0] + mov pc, lr + +3: bic r1, r1, #3 + ldr r3, [r1], #4 + cmp ip, #2 + blt 5f + bgt 6f + +4: mov ip, r3, pull #16 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #16 + str ip, [r0] + bne 4b + mov pc, lr + +5: mov ip, r3, pull #8 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #24 + str ip, [r0] + bne 5b + mov pc, lr + +6: mov ip, r3, pull #24 + ldr r3, [r1], #4 + subs r2, r2, #1 + orr ip, ip, r3, push #8 + str ip, [r0] + bne 6b + mov pc, lr +ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S new file mode 100644 index 00000000..cd34503e --- /dev/null +++ b/arch/arm/lib/io-writesw-armv3.S @@ -0,0 +1,127 @@ +/* + * linux/arch/arm/lib/io-writesw-armv3.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <mach/hardware.h> + +.Loutsw_bad_alignment: + adr r0, .Loutsw_bad_align_msg + mov r2, lr + b panic +.Loutsw_bad_align_msg: + .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n" + .align + +.Loutsw_align: tst r1, #1 + bne .Loutsw_bad_alignment + + add r1, r1, #2 + + ldr r3, [r1, #-4] + mov r3, r3, lsr #16 + orr r3, r3, r3, lsl #16 + str r3, [r0] + subs r2, r2, #1 + moveq pc, lr + +ENTRY(__raw_writesw) + teq r2, #0 @ do we have to check for the zero len? + moveq pc, lr + tst r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, r6, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r5, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r5, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r6, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r6, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + subs r2, r2, #8 + bpl .Loutsw_8_lp + + tst r2, #7 + ldmeqfd sp!, {r4, r5, r6, pc} + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, r4} + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + + mov ip, r4, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r4, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_4: tst r2, #2 + beq .Lno_outsw_2 + + ldr r3, [r1], #4 + + mov ip, r3, lsl #16 + orr ip, ip, ip, lsr #16 + str ip, [r0] + + mov ip, r3, lsr #16 + orr ip, ip, ip, lsl #16 + str ip, [r0] + +.Lno_outsw_2: tst r2, #1 + + ldrne r3, [r1] + + movne ip, r3, lsl #16 + orrne ip, ip, ip, lsr #16 + strne ip, [r0] + + ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S new file mode 100644 index 00000000..ff4f71b5 --- /dev/null +++ b/arch/arm/lib/io-writesw-armv4.S @@ -0,0 +1,100 @@ +/* + * linux/arch/arm/lib/io-writesw-armv4.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .macro outword, rd +#ifndef __ARMEB__ + strh \rd, [r0] + mov \rd, \rd, lsr #16 + strh \rd, [r0] +#else + mov lr, \rd, lsr #16 + strh lr, [r0] + strh \rd, [r0] +#endif + .endm + +.Loutsw_align: movs ip, r1, lsl #31 + bne .Loutsw_noalign + + ldrh r3, [r1], #2 + sub r2, r2, #1 + strh r3, [r0] + +ENTRY(__raw_writesw) + teq r2, #0 + moveq pc, lr + ands r3, r1, #3 + bne .Loutsw_align + + stmfd sp!, {r4, r5, lr} + + subs r2, r2, #8 + bmi .Lno_outsw_8 + +.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip} + subs r2, r2, #8 + outword r3 + outword r4 + outword r5 + outword ip + bpl .Loutsw_8_lp + +.Lno_outsw_8: tst r2, #4 + beq .Lno_outsw_4 + + ldmia r1!, {r3, ip} + outword r3 + outword ip + +.Lno_outsw_4: movs r2, r2, lsl #31 + bcc .Lno_outsw_2 + + ldr r3, [r1], #4 + outword r3 + +.Lno_outsw_2: ldrneh r3, [r1] + strneh r3, [r0] + + ldmfd sp!, {r4, r5, pc} + +#ifdef __ARMEB__ +#define pull_hbyte0 lsl #8 +#define push_hbyte1 lsr #24 +#else +#define pull_hbyte0 lsr #24 +#define push_hbyte1 lsl #8 +#endif + +.Loutsw_noalign: + ARM( ldr r3, [r1, -r3]! ) + THUMB( rsb r3, r3, #0 ) + THUMB( ldr r3, [r1, r3] ) + THUMB( sub r1, r3 ) + subcs r2, r2, #1 + bcs 2f + subs r2, r2, #2 + bmi 3f + +1: mov ip, r3, lsr #8 + strh ip, [r0] +2: mov ip, r3, pull_hbyte0 + ldr r3, [r1, #4]! + subs r2, r2, #2 + orr ip, ip, r3, push_hbyte1 + strh ip, [r0] + bpl 1b + + tst r2, #1 +3: movne ip, r3, lsr #8 + strneh ip, [r0] + mov pc, lr +ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 00000000..c562f649 --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,363 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <nico@fluxnic.net> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) + + subs r2, r1, #1 + moveq pc, lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + mov pc, lr + +11: moveq r0, #1 + movne r0, #0 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) +UNWIND(.fnstart) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + movls pc, lr + + ARM_MOD_BODY r0, r1, r2, r3 + + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) +UNWIND(.fnstart) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr + +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S new file mode 100644 index 00000000..f83d4491 --- /dev/null +++ b/arch/arm/lib/lshrdi3.S @@ -0,0 +1,53 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +ENTRY(__lshrdi3) +ENTRY(__aeabi_llsr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) + mov ah, ah, lsr r2 + mov pc, lr + +ENDPROC(__lshrdi3) +ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S new file mode 100644 index 00000000..1da86991 --- /dev/null +++ b/arch/arm/lib/memchr.S @@ -0,0 +1,26 @@ +/* + * linux/arch/arm/lib/memchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(memchr) +1: subs r2, r2, #1 + bmi 2f + ldrb r3, [r0], #1 + teq r3, r1 + bne 1b + sub r0, r0, #1 +2: movne r0, #0 + mov pc, lr +ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S new file mode 100644 index 00000000..a9b9e228 --- /dev/null +++ b/arch/arm/lib/memcpy.S @@ -0,0 +1,63 @@ +/* + * linux/arch/arm/lib/memcpy.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 + + .macro ldr1w ptr reg abort + W(ldr) \reg, [\ptr], #4 + .endm + + .macro ldr4w ptr reg1 reg2 reg3 reg4 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} + .endm + + .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro ldr1b ptr reg cond=al abort + ldr\cond\()b \reg, [\ptr], #1 + .endm + + .macro str1w ptr reg abort + W(str) \reg, [\ptr], #4 + .endm + + .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort + stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} + .endm + + .macro str1b ptr reg cond=al abort + str\cond\()b \reg, [\ptr], #1 + .endm + + .macro enter reg1 reg2 + stmdb sp!, {r0, \reg1, \reg2} + .endm + + .macro exit reg1 reg2 + ldmfd sp!, {r0, \reg1, \reg2} + .endm + + .text + +/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ + +ENTRY(memcpy) + +#include "copy_template.S" + +ENDPROC(memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S new file mode 100644 index 00000000..938fc14f --- /dev/null +++ b/arch/arm/lib/memmove.S @@ -0,0 +1,199 @@ +/* + * linux/arch/arm/lib/memmove.S + * + * Author: Nicolas Pitre + * Created: Sep 28, 2005 + * Copyright: (C) MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Prototype: void *memmove(void *dest, const void *src, size_t n); + * + * Note: + * + * If the memory regions don't overlap, we simply branch to memcpy which is + * normally a bit faster. Otherwise the copy is done going downwards. This + * is a transposition of the code from copy_template.S but with the copy + * occurring in the opposite direction. + */ + +ENTRY(memmove) + + subs ip, r0, r1 + cmphi r2, ip + bls memcpy + + stmfd sp!, {r0, r4, lr} + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + ands ip, r1, #3 + bne 10f + +1: subs r2, r2, #(28) + stmfd sp!, {r5 - r8} + blt 5f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #-4] ) +2: PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + bge 3b + PLD( cmn r2, #96 ) + PLD( bge 4b ) + +5: ands ip, r2, #28 + rsb ip, ip, #32 + addne pc, pc, ip @ C is always clear here + b 7f +6: W(nop) + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r7, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) lr, [r1, #-4]! + + add pc, pc, ip + nop + W(nop) + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r7, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) lr, [r0, #-4]! + + CALGN( bcs 2b ) + +7: ldmfd sp!, {r5 - r8} + +8: movs r2, r2, lsl #31 + ldrneb r3, [r1, #-1]! + ldrcsb r4, [r1, #-1]! + ldrcsb ip, [r1, #-1] + strneb r3, [r0, #-1]! + strcsb r4, [r0, #-1]! + strcsb ip, [r0, #-1] + ldmfd sp!, {r0, r4, pc} + +9: cmp ip, #2 + ldrgtb r3, [r1, #-1]! + ldrgeb r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strgtb r3, [r0, #-1]! + strgeb r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + blt 8b + ands ip, r1, #3 + beq 1b + +10: bic r1, r1, #3 + cmp ip, #2 + ldr r3, [r1, #0] + beq 17f + blt 18f + + + .macro backward_copy_shift push pull + + subs r2, r2, #28 + blt 14f + + CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +11: stmfd sp!, {r5 - r9} + + PLD( pld [r1, #-4] ) + PLD( subs r2, r2, #96 ) + PLD( pld [r1, #-32] ) + PLD( blt 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) + +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r7, r8, r9, ip} + mov lr, r3, push #\push + subs r2, r2, #32 + ldmdb r1!, {r3, r4, r5, r6} + orr lr, lr, ip, pull #\pull + mov ip, ip, push #\push + orr ip, ip, r9, pull #\pull + mov r9, r9, push #\push + orr r9, r9, r8, pull #\pull + mov r8, r8, push #\push + orr r8, r8, r7, pull #\pull + mov r7, r7, push #\push + orr r7, r7, r6, pull #\pull + mov r6, r6, push #\push + orr r6, r6, r5, pull #\pull + mov r5, r5, push #\push + orr r5, r5, r4, pull #\pull + mov r4, r4, push #\push + orr r4, r4, r3, pull #\pull + stmdb r0!, {r4 - r9, ip, lr} + bge 12b + PLD( cmn r2, #96 ) + PLD( bge 13b ) + + ldmfd sp!, {r5 - r9} + +14: ands ip, r2, #28 + beq 16f + +15: mov lr, r3, push #\push + ldr r3, [r1, #-4]! + subs ip, ip, #4 + orr lr, lr, r3, pull #\pull + str lr, [r0, #-4]! + bgt 15b + CALGN( cmp r2, #0 ) + CALGN( bge 11b ) + +16: add r1, r1, #(\pull / 8) + b 8b + + .endm + + + backward_copy_shift push=8 pull=24 + +17: backward_copy_shift push=16 pull=16 + +18: backward_copy_shift push=24 pull=8 + +ENDPROC(memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S new file mode 100644 index 00000000..650d5923 --- /dev/null +++ b/arch/arm/lib/memset.S @@ -0,0 +1,127 @@ +/* + * linux/arch/arm/lib/memset.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 + +1: subs r2, r2, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r1, [r0], #1 @ 1 + strleb r1, [r0], #1 @ 1 + strb r1, [r0], #1 @ 1 + add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memset again. + */ + +ENTRY(memset) + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * we know that the pointer in r0 is aligned to a word boundary. + */ + orr r1, r1, r1, lsl #8 + orr r1, r1, r1, lsl #16 + mov r3, r1 + cmp r2, #16 + blt 4f + +#if ! CALGN(1)+0 + +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! + mov ip, r1 + mov lr, r1 + +2: subs r2, r2, #64 + stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + stmgeia r0!, {r1, r3, ip, lr} + bgt 2b + ldmeqfd sp!, {pc} @ Now <64 bytes to go. +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r2, #32 + stmneia r0!, {r1, r3, ip, lr} + stmneia r0!, {r1, r3, ip, lr} + tst r2, #16 + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov ip, r1 + mov lr, r1 + + cmp r2, #96 + tstgt r0, #31 + ble 3f + + and ip, r0, #31 + rsb ip, ip, #32 + sub r2, r2, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + tst ip, #(1 << 30) + mov ip, r1 + strne r1, [r0], #4 + +3: subs r2, r2, #64 + stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia r0!, {r1, r3-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r2, #32 + stmneia r0!, {r1, r3-r7, ip, lr} + tst r2, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + +4: tst r2, #8 + stmneia r0!, {r1, r3} + tst r2, #4 + strne r1, [r0], #4 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r2, #2 + strneb r1, [r0], #1 + strneb r1, [r0], #1 + tst r2, #1 + strneb r1, [r0], #1 + mov pc, lr +ENDPROC(memset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S new file mode 100644 index 00000000..3fbdef5f --- /dev/null +++ b/arch/arm/lib/memzero.S @@ -0,0 +1,125 @@ +/* + * linux/arch/arm/lib/memzero.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 + .word 0 +/* + * Align the pointer in r0. r3 contains the number of bytes that we are + * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we + * don't bother; we use byte stores instead. + */ +1: subs r1, r1, #4 @ 1 do we have enough + blt 5f @ 1 bytes to align with? + cmp r3, #2 @ 1 + strltb r2, [r0], #1 @ 1 + strleb r2, [r0], #1 @ 1 + strb r2, [r0], #1 @ 1 + add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) +/* + * The pointer is now aligned and the length is adjusted. Try doing the + * memzero again. + */ + +ENTRY(__memzero) + mov r2, #0 @ 1 + ands r3, r0, #3 @ 1 unaligned? + bne 1b @ 1 +/* + * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary. + */ + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes + +#if ! CALGN(1)+0 + +/* + * We need an extra register for this loop - save the return address and + * use the LR + */ + str lr, [sp, #-4]! @ 1 + mov ip, r2 @ 1 + mov lr, r2 @ 1 + +3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + stmgeia r0!, {r2, r3, ip, lr} @ 4 + bgt 3b @ 1 + ldmeqfd sp!, {pc} @ 1/2 quick exit +/* + * No need to correct the count; we're only testing bits from now on + */ + tst r1, #32 @ 1 + stmneia r0!, {r2, r3, ip, lr} @ 4 + stmneia r0!, {r2, r3, ip, lr} @ 4 + tst r1, #16 @ 1 16 bytes or more? + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 + +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r2 + mov r5, r2 + mov r6, r2 + mov r7, r2 + mov ip, r2 + mov lr, r2 + + cmp r1, #96 + andgts ip, r0, #31 + ble 3f + + rsb ip, ip, #32 + sub r1, r1, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + movs ip, ip, lsl #2 + strcs r2, [r0], #4 + +3: subs r1, r1, #64 + stmgeia r0!, {r2-r7, ip, lr} + stmgeia r0!, {r2-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r1, #32 + stmneia r0!, {r2-r7, ip, lr} + tst r1, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + +4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + strne r2, [r0], #4 @ 1 +/* + * When we get here, we've got less than 4 bytes to zero. We + * may have an unaligned pointer as well. + */ +5: tst r1, #2 @ 1 2 bytes or more? + strneb r2, [r0], #1 @ 1 + strneb r2, [r0], #1 @ 1 + tst r1, #1 @ 1 a byte left over + strneb r2, [r0], #1 @ 1 + mov pc, lr @ 1 +ENDPROC(__memzero) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S new file mode 100644 index 00000000..36c91b49 --- /dev/null +++ b/arch/arm/lib/muldi3.S @@ -0,0 +1,47 @@ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + mov pc, lr + +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S new file mode 100644 index 00000000..7db25990 --- /dev/null +++ b/arch/arm/lib/putuser.S @@ -0,0 +1,92 @@ +/* + * linux/arch/arm/lib/putuser.S + * + * Copyright (C) 2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Idea from x86 version, (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface to make + * them more efficient, especially as they return an error + * value in addition to the "real" return value. + * + * __put_user_X + * + * Inputs: r0 contains the address + * r2, r3 contains the value + * Outputs: r0 is the error code + * lr corrupted + * + * No other registers must be altered. (see <asm/uaccess.h> + * for specific ASM register usage). + * + * Note that ADDR_LIMIT is either 0 or 0xc0000000 + * Note also that it is intended that __put_user_bad is not global. + */ +#include <linux/linkage.h> +#include <asm/errno.h> +#include <asm/domain.h> + +ENTRY(__put_user_1) +1: TUSER(strb) r2, [r0] + mov r0, #0 + mov pc, lr +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + mov ip, r2, lsr #8 +#ifdef CONFIG_THUMB2_KERNEL +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] +#else +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] +#endif +#else /* !CONFIG_THUMB2_KERNEL */ +#ifndef __ARMEB__ +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] +#else +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] +#endif +#endif /* CONFIG_THUMB2_KERNEL */ + mov r0, #0 + mov pc, lr +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) +4: TUSER(str) r2, [r0] + mov r0, #0 + mov pc, lr +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] +#else +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] +#endif + mov r0, #0 + mov pc, lr +ENDPROC(__put_user_8) + +__put_user_bad: + mov r0, #-EFAULT + mov pc, lr +ENDPROC(__put_user_bad) + +.pushsection __ex_table, "a" + .long 1b, __put_user_bad + .long 2b, __put_user_bad + .long 3b, __put_user_bad + .long 4b, __put_user_bad + .long 5b, __put_user_bad + .long 6b, __put_user_bad +.popsection diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S new file mode 100644 index 00000000..bbee5c66 --- /dev/null +++ b/arch/arm/lib/setbit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/setbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_set_bit) + bitop orr +ENDPROC(_set_bit) diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S new file mode 100644 index 00000000..eb0edb80 --- /dev/null +++ b/arch/arm/lib/sha1.S @@ -0,0 +1,211 @@ +/* + * linux/arch/arm/lib/sha1.S + * + * SHA transform optimized for ARM + * + * Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net> + * Created: September 17, 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * The reference implementation for this code is linux/lib/sha1.c + */ + +#include <linux/linkage.h> + + .text + + +/* + * void sha_transform(__u32 *digest, const char *in, __u32 *W) + * + * Note: the "in" ptr may be unaligned. + */ + +ENTRY(sha_transform) + + stmfd sp!, {r4 - r8, lr} + + @ for (i = 0; i < 16; i++) + @ W[i] = be32_to_cpu(in[i]); + +#ifdef __ARMEB__ + mov r4, r0 + mov r0, r2 + mov r2, #64 + bl memcpy + mov r2, r0 + mov r0, r4 +#else + mov r3, r2 + mov lr, #16 +1: ldrb r4, [r1], #1 + ldrb r5, [r1], #1 + ldrb r6, [r1], #1 + ldrb r7, [r1], #1 + subs lr, lr, #1 + orr r5, r5, r4, lsl #8 + orr r6, r6, r5, lsl #8 + orr r7, r7, r6, lsl #8 + str r7, [r3], #4 + bne 1b +#endif + + @ for (i = 0; i < 64; i++) + @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); + + sub r3, r2, #4 + mov lr, #64 +2: ldr r4, [r3, #4]! + subs lr, lr, #1 + ldr r5, [r3, #8] + ldr r6, [r3, #32] + ldr r7, [r3, #52] + eor r4, r4, r5 + eor r4, r4, r6 + eor r4, r4, r7 + mov r4, r4, ror #31 + str r4, [r3, #64] + bne 2b + + /* + * The SHA functions are: + * + * f1(B,C,D) = (D ^ (B & (C ^ D))) + * f2(B,C,D) = (B ^ C ^ D) + * f3(B,C,D) = ((B & C) | (D & (B | C))) + * + * Then the sub-blocks are processed as follows: + * + * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ + * B' = A + * C' = ror(B, 2) + * D' = C + * E' = D + * + * We therefore unroll each loop 5 times to avoid register shuffling. + * Also the ror for C (and also D and E which are successivelyderived + * from it) is applied in place to cut on an additional mov insn for + * each round. + */ + + .macro sha_f1, A, B, C, D, E + ldr r3, [r2], #4 + eor ip, \C, \D + add \E, r1, \E, ror #2 + and ip, \B, ip, ror #2 + add \E, \E, \A, ror #27 + eor ip, ip, \D, ror #2 + add \E, \E, r3 + add \E, \E, ip + .endm + + .macro sha_f2, A, B, C, D, E + ldr r3, [r2], #4 + add \E, r1, \E, ror #2 + eor ip, \B, \C, ror #2 + add \E, \E, \A, ror #27 + eor ip, ip, \D, ror #2 + add \E, \E, r3 + add \E, \E, ip + .endm + + .macro sha_f3, A, B, C, D, E + ldr r3, [r2], #4 + add \E, r1, \E, ror #2 + orr ip, \B, \C, ror #2 + add \E, \E, \A, ror #27 + and ip, ip, \D, ror #2 + add \E, \E, r3 + and r3, \B, \C, ror #2 + orr ip, ip, r3 + add \E, \E, ip + .endm + + ldmia r0, {r4 - r8} + + mov lr, #4 + ldr r1, .L_sha_K + 0 + + /* adjust initial values */ + mov r6, r6, ror #30 + mov r7, r7, ror #30 + mov r8, r8, ror #30 + +3: subs lr, lr, #1 + sha_f1 r4, r5, r6, r7, r8 + sha_f1 r8, r4, r5, r6, r7 + sha_f1 r7, r8, r4, r5, r6 + sha_f1 r6, r7, r8, r4, r5 + sha_f1 r5, r6, r7, r8, r4 + bne 3b + + ldr r1, .L_sha_K + 4 + mov lr, #4 + +4: subs lr, lr, #1 + sha_f2 r4, r5, r6, r7, r8 + sha_f2 r8, r4, r5, r6, r7 + sha_f2 r7, r8, r4, r5, r6 + sha_f2 r6, r7, r8, r4, r5 + sha_f2 r5, r6, r7, r8, r4 + bne 4b + + ldr r1, .L_sha_K + 8 + mov lr, #4 + +5: subs lr, lr, #1 + sha_f3 r4, r5, r6, r7, r8 + sha_f3 r8, r4, r5, r6, r7 + sha_f3 r7, r8, r4, r5, r6 + sha_f3 r6, r7, r8, r4, r5 + sha_f3 r5, r6, r7, r8, r4 + bne 5b + + ldr r1, .L_sha_K + 12 + mov lr, #4 + +6: subs lr, lr, #1 + sha_f2 r4, r5, r6, r7, r8 + sha_f2 r8, r4, r5, r6, r7 + sha_f2 r7, r8, r4, r5, r6 + sha_f2 r6, r7, r8, r4, r5 + sha_f2 r5, r6, r7, r8, r4 + bne 6b + + ldmia r0, {r1, r2, r3, ip, lr} + add r4, r1, r4 + add r5, r2, r5 + add r6, r3, r6, ror #2 + add r7, ip, r7, ror #2 + add r8, lr, r8, ror #2 + stmia r0, {r4 - r8} + + ldmfd sp!, {r4 - r8, pc} + +ENDPROC(sha_transform) + + .align 2 +.L_sha_K: + .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 + + +/* + * void sha_init(__u32 *buf) + */ + + .align 2 +.L_sha_initial_digest: + .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 + +ENTRY(sha_init) + + str lr, [sp, #-4]! + adr r1, .L_sha_initial_digest + ldmia r1, {r1, r2, r3, ip, lr} + stmia r0, {r1, r2, r3, ip, lr} + ldr pc, [sp], #4 + +ENDPROC(sha_init) diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S new file mode 100644 index 00000000..d8f2a1c1 --- /dev/null +++ b/arch/arm/lib/strchr.S @@ -0,0 +1,27 @@ +/* + * linux/arch/arm/lib/strchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strchr) + and r1, r1, #0xff +1: ldrb r2, [r0], #1 + teq r2, r1 + teqne r2, #0 + bne 1b + teq r2, r1 + movne r0, #0 + subeq r0, r0, #1 + mov pc, lr +ENDPROC(strchr) diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S new file mode 100644 index 00000000..f202d7bd --- /dev/null +++ b/arch/arm/lib/strncpy_from_user.S @@ -0,0 +1,43 @@ +/* + * linux/arch/arm/lib/strncpy_from_user.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* + * Copy a string from user space to kernel space. + * r0 = dst, r1 = src, r2 = byte length + * returns the number of characters copied (strlen of copied string), + * -EFAULT on exception, or "len" if we fill the whole buffer + */ +ENTRY(__strncpy_from_user) + mov ip, r1 +1: subs r2, r2, #1 + ldrusr r3, r1, 1, pl + bmi 2f + strb r3, [r0], #1 + teq r3, #0 + bne 1b + sub r1, r1, #1 @ take NUL character out of count +2: sub r0, r1, ip + mov pc, lr +ENDPROC(__strncpy_from_user) + + .pushsection .fixup,"ax" + .align 0 +9001: mov r3, #0 + strb r3, [r0, #0] @ null terminate + mov r0, #-EFAULT + mov pc, lr + .popsection + diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S new file mode 100644 index 00000000..0ecbb459 --- /dev/null +++ b/arch/arm/lib/strnlen_user.S @@ -0,0 +1,40 @@ +/* + * linux/arch/arm/lib/strnlen_user.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> + + .text + .align 5 + +/* Prototype: unsigned long __strnlen_user(const char *str, long n) + * Purpose : get length of a string in user memory + * Params : str - address of string in user memory + * Returns : length of string *including terminator* + * or zero on exception, or n + 1 if too long + */ +ENTRY(__strnlen_user) + mov r2, r0 +1: + ldrusr r3, r0, 1 + teq r3, #0 + beq 2f + subs r1, r1, #1 + bne 1b + add r0, r0, #1 +2: sub r0, r0, r2 + mov pc, lr +ENDPROC(__strnlen_user) + + .pushsection .fixup,"ax" + .align 0 +9001: mov r0, #0 + mov pc, lr + .popsection diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S new file mode 100644 index 00000000..302f20cd --- /dev/null +++ b/arch/arm/lib/strrchr.S @@ -0,0 +1,26 @@ +/* + * linux/arch/arm/lib/strrchr.S + * + * Copyright (C) 1995-2000 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ASM optimised string functions + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 5 +ENTRY(strrchr) + mov r3, #0 +1: ldrb r2, [r0], #1 + teq r2, r1 + subeq r3, r0, #1 + teq r2, #0 + bne 1b + mov r0, r3 + mov pc, lr +ENDPROC(strrchr) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S new file mode 100644 index 00000000..15a4d431 --- /dev/null +++ b/arch/arm/lib/testchangebit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/testchangebit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_test_and_change_bit) + testop eor, str +ENDPROC(_test_and_change_bit) diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S new file mode 100644 index 00000000..521b66b5 --- /dev/null +++ b/arch/arm/lib/testclearbit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/testclearbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_test_and_clear_bit) + testop bicne, strne +ENDPROC(_test_and_clear_bit) diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S new file mode 100644 index 00000000..1c98cc21 --- /dev/null +++ b/arch/arm/lib/testsetbit.S @@ -0,0 +1,17 @@ +/* + * linux/arch/arm/lib/testsetbit.S + * + * Copyright (C) 1995-1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "bitops.h" + .text + +ENTRY(_test_and_set_bit) + testop orreq, streq +ENDPROC(_test_and_set_bit) diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S new file mode 100644 index 00000000..5c908b1c --- /dev/null +++ b/arch/arm/lib/uaccess.S @@ -0,0 +1,564 @@ +/* + * linux/arch/arm/lib/uaccess.S + * + * Copyright (C) 1995, 1996,1997,1998 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Routines to block copy data to/from user memory + * These are highly optimised both for the 4k page size + * and for various alignments. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/domain.h> + + .text + +#define PAGE_SHIFT 12 + +/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) + * Purpose : copy a block to user memory from kernel memory + * Params : to - user memory + * : from - kernel memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ + +.Lc2u_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 + ldrb r3, [r1], #1 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + sub r2, r2, ip + b .Lc2u_dest_aligned + +ENTRY(__copy_to_user) + stmfd sp!, {r2, r4 - r7, lr} + cmp r2, #4 + blt .Lc2u_not_enough + ands ip, r0, #3 + bne .Lc2u_dest_not_aligned +.Lc2u_dest_aligned: + + ands ip, r1, #3 + bne .Lc2u_src_not_aligned +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.Lc2u_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_0nowords + ldr r3, [r1], #4 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .Lc2u_0rem8lp + +.Lc2u_0cpy8lp: ldmia r1!, {r3 - r6} + stmia r0!, {r3 - r6} @ Shouldnt fault + ldmia r1!, {r3 - r6} + subs ip, ip, #32 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_0cpy8lp + +.Lc2u_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} + stmgeia r0!, {r3 - r6} @ Shouldnt fault + tst ip, #8 + ldmneia r1!, {r3 - r4} + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + ldrne r3, [r1], #4 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_0fupi +.Lc2u_0nowords: teq ip, #0 + beq .Lc2u_finished +.Lc2u_nowords: cmp ip, #2 + ldrb r3, [r1], #1 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #1 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_not_enough: + movs ip, r2 + bne .Lc2u_nowords +.Lc2u_finished: mov r0, #0 + ldmfd sp!, {r2, r4 - r7, pc} + +.Lc2u_src_not_aligned: + bic r1, r1, #3 + ldr r7, [r1], #4 + cmp ip, #2 + bgt .Lc2u_3fupi + beq .Lc2u_2fupi +.Lc2u_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_1nowords + mov r3, r7, pull #8 + ldr r7, [r1], #4 + orr r3, r3, r7, push #24 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_1rem8lp + +.Lc2u_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_1cpy8lp + +.Lc2u_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #8 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #24 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_1fupi +.Lc2u_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + movge r3, r7, get_byte_2 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + movgt r3, r7, get_byte_3 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_2nowords + mov r3, r7, pull #16 + ldr r7, [r1], #4 + orr r3, r3, r7, push #16 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_2rem8lp + +.Lc2u_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_2cpy8lp + +.Lc2u_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #16 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #16 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_2fupi +.Lc2u_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + movge r3, r7, get_byte_3 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished + +.Lc2u_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lc2u_3nowords + mov r3, r7, pull #24 + ldr r7, [r1], #4 + orr r3, r3, r7, push #8 +USER( TUSER( str) r3, [r0], #4) @ May fault + mov ip, r0, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lc2u_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lc2u_3rem8lp + +.Lc2u_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} + subs ip, ip, #16 + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} @ Shouldnt fault + bpl .Lc2u_3cpy8lp + +.Lc2u_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} @ Shouldnt fault + tst ip, #4 + movne r3, r7, pull #24 + ldrne r7, [r1], #4 + orrne r3, r3, r7, push #8 + TUSER( strne) r3, [r0], #4 @ Shouldnt fault + ands ip, ip, #3 + beq .Lc2u_3fupi +.Lc2u_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .Lc2u_finished + cmp ip, #2 +USER( TUSER( strb) r3, [r0], #1) @ May fault + ldrgeb r3, [r1], #1 +USER( TUSER( strgeb) r3, [r0], #1) @ May fault + ldrgtb r3, [r1], #0 +USER( TUSER( strgtb) r3, [r0], #1) @ May fault + b .Lc2u_finished +ENDPROC(__copy_to_user) + + .pushsection .fixup,"ax" + .align 0 +9001: ldmfd sp!, {r0, r4 - r7, pc} + .popsection + +/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); + * Purpose : copy a block from user memory to kernel memory + * Params : to - kernel memory + * : from - user memory + * : n - number of bytes to copy + * Returns : Number of bytes NOT copied. + */ +.Lcfu_dest_not_aligned: + rsb ip, ip, #4 + cmp ip, #2 +USER( TUSER( ldrb) r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + sub r2, r2, ip + b .Lcfu_dest_aligned + +ENTRY(__copy_from_user) + stmfd sp!, {r0, r2, r4 - r7, lr} + cmp r2, #4 + blt .Lcfu_not_enough + ands ip, r0, #3 + bne .Lcfu_dest_not_aligned +.Lcfu_dest_aligned: + ands ip, r1, #3 + bne .Lcfu_src_not_aligned + +/* + * Seeing as there has to be at least 8 bytes to copy, we can + * copy one word, and force a user-mode page fault... + */ + +.Lcfu_0fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_0nowords +USER( TUSER( ldr) r3, [r1], #4) + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_0fupi +/* + * ip = max no. of bytes to copy before needing another "strt" insn + */ + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #32 + blt .Lcfu_0rem8lp + +.Lcfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault + stmia r0!, {r3 - r6} + ldmia r1!, {r3 - r6} @ Shouldnt fault + subs ip, ip, #32 + stmia r0!, {r3 - r6} + bpl .Lcfu_0cpy8lp + +.Lcfu_0rem8lp: cmn ip, #16 + ldmgeia r1!, {r3 - r6} @ Shouldnt fault + stmgeia r0!, {r3 - r6} + tst ip, #8 + ldmneia r1!, {r3 - r4} @ Shouldnt fault + stmneia r0!, {r3 - r4} + tst ip, #4 + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_0fupi +.Lcfu_0nowords: teq ip, #0 + beq .Lcfu_finished +.Lcfu_nowords: cmp ip, #2 +USER( TUSER( ldrb) r3, [r1], #1) @ May fault + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_not_enough: + movs ip, r2 + bne .Lcfu_nowords +.Lcfu_finished: mov r0, #0 + add sp, sp, #8 + ldmfd sp!, {r4 - r7, pc} + +.Lcfu_src_not_aligned: + bic r1, r1, #3 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + cmp ip, #2 + bgt .Lcfu_3fupi + beq .Lcfu_2fupi +.Lcfu_1fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_1nowords + mov r3, r7, pull #8 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #24 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_1fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_1rem8lp + +.Lcfu_1cpy8lp: mov r3, r7, pull #8 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #24 + mov r4, r4, pull #8 + orr r4, r4, r5, push #24 + mov r5, r5, pull #8 + orr r5, r5, r6, push #24 + mov r6, r6, pull #8 + orr r6, r6, r7, push #24 + stmia r0!, {r3 - r6} + bpl .Lcfu_1cpy8lp + +.Lcfu_1rem8lp: tst ip, #8 + movne r3, r7, pull #8 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #24 + movne r4, r4, pull #8 + orrne r4, r4, r7, push #24 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #8 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #24 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_1fupi +.Lcfu_1nowords: mov r3, r7, get_byte_1 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_2 + strgeb r3, [r0], #1 + movgt r3, r7, get_byte_3 + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_2fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_2nowords + mov r3, r7, pull #16 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #16 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_2fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_2rem8lp + + +.Lcfu_2cpy8lp: mov r3, r7, pull #16 + ldmia r1!, {r4 - r7} @ Shouldnt fault + subs ip, ip, #16 + orr r3, r3, r4, push #16 + mov r4, r4, pull #16 + orr r4, r4, r5, push #16 + mov r5, r5, pull #16 + orr r5, r5, r6, push #16 + mov r6, r6, pull #16 + orr r6, r6, r7, push #16 + stmia r0!, {r3 - r6} + bpl .Lcfu_2cpy8lp + +.Lcfu_2rem8lp: tst ip, #8 + movne r3, r7, pull #16 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #16 + movne r4, r4, pull #16 + orrne r4, r4, r7, push #16 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #16 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #16 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_2fupi +.Lcfu_2nowords: mov r3, r7, get_byte_2 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 + movge r3, r7, get_byte_3 + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished + +.Lcfu_3fupi: subs r2, r2, #4 + addmi ip, r2, #4 + bmi .Lcfu_3nowords + mov r3, r7, pull #24 +USER( TUSER( ldr) r7, [r1], #4) @ May fault + orr r3, r3, r7, push #8 + str r3, [r0], #4 + mov ip, r1, lsl #32 - PAGE_SHIFT + rsb ip, ip, #0 + movs ip, ip, lsr #32 - PAGE_SHIFT + beq .Lcfu_3fupi + cmp r2, ip + movlt ip, r2 + sub r2, r2, ip + subs ip, ip, #16 + blt .Lcfu_3rem8lp + +.Lcfu_3cpy8lp: mov r3, r7, pull #24 + ldmia r1!, {r4 - r7} @ Shouldnt fault + orr r3, r3, r4, push #8 + mov r4, r4, pull #24 + orr r4, r4, r5, push #8 + mov r5, r5, pull #24 + orr r5, r5, r6, push #8 + mov r6, r6, pull #24 + orr r6, r6, r7, push #8 + stmia r0!, {r3 - r6} + subs ip, ip, #16 + bpl .Lcfu_3cpy8lp + +.Lcfu_3rem8lp: tst ip, #8 + movne r3, r7, pull #24 + ldmneia r1!, {r4, r7} @ Shouldnt fault + orrne r3, r3, r4, push #8 + movne r4, r4, pull #24 + orrne r4, r4, r7, push #8 + stmneia r0!, {r3 - r4} + tst ip, #4 + movne r3, r7, pull #24 +USER( TUSER( ldrne) r7, [r1], #4) @ May fault + orrne r3, r3, r7, push #8 + strne r3, [r0], #4 + ands ip, ip, #3 + beq .Lcfu_3fupi +.Lcfu_3nowords: mov r3, r7, get_byte_3 + teq ip, #0 + beq .Lcfu_finished + cmp ip, #2 + strb r3, [r0], #1 +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault + strgeb r3, [r0], #1 +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault + strgtb r3, [r0], #1 + b .Lcfu_finished +ENDPROC(__copy_from_user) + + .pushsection .fixup,"ax" + .align 0 + /* + * We took an exception. r0 contains a pointer to + * the byte not copied. + */ +9001: ldr r2, [sp], #4 @ void *to + sub r2, r0, r2 @ bytes copied + ldr r1, [sp], #4 @ unsigned long count + subs r4, r1, r2 @ bytes left to copy + movne r1, r4 + blne __memzero + mov r0, r4 + ldmfd sp!, {r4 - r7, pc} + .popsection + diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 00000000..8b9b1364 --- /dev/null +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -0,0 +1,234 @@ +/* + * linux/arch/arm/lib/uaccess_with_memcpy.c + * + * Written by: Lennert Buytenhek and Nicolas Pitre + * Copyright (C) 2009 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/uaccess.h> +#include <linux/rwsem.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/hardirq.h> /* for in_atomic() */ +#include <linux/gfp.h> +#include <asm/current.h> +#include <asm/page.h> + +static int +pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) +{ + unsigned long addr = (unsigned long)_addr; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pud_t *pud; + spinlock_t *ptl; + + pgd = pgd_offset(current->mm, addr); + if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) + return 0; + + pud = pud_offset(pgd, addr); + if (unlikely(pud_none(*pud) || pud_bad(*pud))) + return 0; + + pmd = pmd_offset(pud, addr); + if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) + return 0; + + pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); + if (unlikely(!pte_present(*pte) || !pte_young(*pte) || + !pte_write(*pte) || !pte_dirty(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + *ptep = pte; + *ptlp = ptl; + + return 1; +} + +static unsigned long noinline +__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) +{ + int atomic; + + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memcpy((void *)to, from, n); + return 0; + } + + /* the mmap semaphore is taken only if not in an atomic context */ + atomic = in_atomic(); + + if (!atomic) + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(to, &pte, &ptl)) { + if (!atomic) + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)to)) + goto out; + if (!atomic) + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memcpy((void *)to, from, tocopy); + to += tocopy; + from += tocopy; + n -= tocopy; + + pte_unmap_unlock(pte, ptl); + } + if (!atomic) + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + /* + * This test is stubbed out of the main function above to keep + * the overhead for small copies low by avoiding a large + * register dump on the stack just to reload them right away. + * With frame pointer disabled, tail call optimization kicks in + * as well making this test almost invisible. + */ + if (n < 64) + return __copy_to_user_std(to, from, n); + return __copy_to_user_memcpy(to, from, n); +} + +static unsigned long noinline +__clear_user_memset(void __user *addr, unsigned long n) +{ + if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { + memset((void *)addr, 0, n); + return 0; + } + + down_read(¤t->mm->mmap_sem); + while (n) { + pte_t *pte; + spinlock_t *ptl; + int tocopy; + + while (!pin_page_for_write(addr, &pte, &ptl)) { + up_read(¤t->mm->mmap_sem); + if (__put_user(0, (char __user *)addr)) + goto out; + down_read(¤t->mm->mmap_sem); + } + + tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; + if (tocopy > n) + tocopy = n; + + memset((void *)addr, 0, tocopy); + addr += tocopy; + n -= tocopy; + + pte_unmap_unlock(pte, ptl); + } + up_read(¤t->mm->mmap_sem); + +out: + return n; +} + +unsigned long __clear_user(void __user *addr, unsigned long n) +{ + /* See rational for this in __copy_to_user() above. */ + if (n < 64) + return __clear_user_std(addr, n); + return __clear_user_memset(addr, n); +} + +#if 0 + +/* + * This code is disabled by default, but kept around in case the chosen + * thresholds need to be revalidated. Some overhead (small but still) + * would be implied by a runtime determined variable threshold, and + * so far the measurement on concerned targets didn't show a worthwhile + * variation. + * + * Note that a fairly precise sched_clock() implementation is needed + * for results to make some sense. + */ + +#include <linux/vmalloc.h> + +static int __init test_size_treshold(void) +{ + struct page *src_page, *dst_page; + void *user_ptr, *kernel_ptr; + unsigned long long t0, t1, t2; + int size, ret; + + ret = -ENOMEM; + src_page = alloc_page(GFP_KERNEL); + if (!src_page) + goto no_src; + dst_page = alloc_page(GFP_KERNEL); + if (!dst_page) + goto no_dst; + kernel_ptr = page_address(src_page); + user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); + if (!user_ptr) + goto no_vmap; + + /* warm up the src page dcache */ + ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); + t1 = sched_clock(); + ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); + t2 = sched_clock(); + printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + for (size = PAGE_SIZE; size >= 4; size /= 2) { + t0 = sched_clock(); + ret |= __clear_user_memset(user_ptr, size); + t1 = sched_clock(); + ret |= __clear_user_std(user_ptr, size); + t2 = sched_clock(); + printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); + } + + if (ret) + ret = -EFAULT; + + vunmap(user_ptr); +no_vmap: + put_page(dst_page); +no_dst: + put_page(src_page); +no_src: + return ret; +} + +subsys_initcall(test_size_treshold); + +#endif diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S new file mode 100644 index 00000000..f0df6a91 --- /dev/null +++ b/arch/arm/lib/ucmpdi2.S @@ -0,0 +1,52 @@ +/* + * linux/arch/arm/lib/ucmpdi2.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__ucmpdi2) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #0 + moveq r0, #1 + movhi r0, #2 + mov pc, lr + +ENDPROC(__ucmpdi2) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_ulcmp) + + cmp xh, yh + cmpeq xl, yl + movlo r0, #-1 + moveq r0, #0 + movhi r0, #1 + mov pc, lr + +ENDPROC(__aeabi_ulcmp) + +#endif + |