diff options
Diffstat (limited to 'target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch')
-rw-r--r-- | target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch | 355 |
1 files changed, 355 insertions, 0 deletions
diff --git a/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch b/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch new file mode 100644 index 0000000000..92b456dacc --- /dev/null +++ b/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch @@ -0,0 +1,355 @@ +--- + arch/arm/boot/compressed/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++ + 1 file changed, 348 insertions(+) + +--- /dev/null ++++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S +@@ -0,0 +1,348 @@ ++/* ++ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines ++ * ++ * Author: Nicolas Pitre <nico@fluxnic.net> ++ * - contributed to gcc-3.4 on Sep 30, 2003 ++ * - adapted for the Linux kernel on Oct 2, 2003 ++ */ ++ ++/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. ++ ++This file is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 2, or (at your option) any ++later version. ++ ++In addition to the permissions in the GNU General Public License, the ++Free Software Foundation gives you unlimited permission to link the ++compiled version of this file into combinations with other programs, ++and to distribute those combinations without any restriction coming ++from the use of this file. (The General Public License restrictions ++do apply in other respects; for example, they cover modification of ++the file, and distribution when not linked into a combine ++executable.) ++ ++This file is distributed in the hope that it will be useful, but ++WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with this program; see the file COPYING. If not, write to ++the Free Software Foundation, 59 Temple Place - Suite 330, ++Boston, MA 02111-1307, USA. */ ++ ++ ++#include <linux/linkage.h> ++#include <asm/assembler.h> ++ ++ ++.macro ARM_DIV_BODY dividend, divisor, result, curbit ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \curbit, \divisor ++ clz \result, \dividend ++ sub \result, \curbit, \result ++ mov \curbit, #1 ++ mov \divisor, \divisor, lsl \result ++ mov \curbit, \curbit, lsl \result ++ mov \result, #0 ++ ++#else ++ ++ @ Initially shift the divisor left 3 bits if possible, ++ @ set curbit accordingly. This allows for curbit to be located ++ @ at the left end of each 4 bit nibbles in the division loop ++ @ to save one loop in most cases. ++ tst \divisor, #0xe0000000 ++ moveq \divisor, \divisor, lsl #3 ++ moveq \curbit, #8 ++ movne \curbit, #1 ++ ++ @ Unless the divisor is very big, shift it up in multiples of ++ @ four bits, since this is the amount of unwinding in the main ++ @ division loop. Continue shifting until the divisor is ++ @ larger than the dividend. ++1: cmp \divisor, #0x10000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #4 ++ movlo \curbit, \curbit, lsl #4 ++ blo 1b ++ ++ @ For very big divisors, we must shift it a bit at a time, or ++ @ we will be in danger of overflowing. ++1: cmp \divisor, #0x80000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #1 ++ movlo \curbit, \curbit, lsl #1 ++ blo 1b ++ ++ mov \result, #0 ++ ++#endif ++ ++ @ Division loop ++1: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ orrhs \result, \result, \curbit ++ cmp \dividend, \divisor, lsr #1 ++ subhs \dividend, \dividend, \divisor, lsr #1 ++ orrhs \result, \result, \curbit, lsr #1 ++ cmp \dividend, \divisor, lsr #2 ++ subhs \dividend, \dividend, \divisor, lsr #2 ++ orrhs \result, \result, \curbit, lsr #2 ++ cmp \dividend, \divisor, lsr #3 ++ subhs \dividend, \dividend, \divisor, lsr #3 ++ orrhs \result, \result, \curbit, lsr #3 ++ cmp \dividend, #0 @ Early termination? ++ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? ++ movne \divisor, \divisor, lsr #4 ++ bne 1b ++ ++.endm ++ ++ ++.macro ARM_DIV2_ORDER divisor, order ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \order, \divisor ++ rsb \order, \order, #31 ++ ++#else ++ ++ cmp \divisor, #(1 << 16) ++ movhs \divisor, \divisor, lsr #16 ++ movhs \order, #16 ++ movlo \order, #0 ++ ++ cmp \divisor, #(1 << 8) ++ movhs \divisor, \divisor, lsr #8 ++ addhs \order, \order, #8 ++ ++ cmp \divisor, #(1 << 4) ++ movhs \divisor, \divisor, lsr #4 ++ addhs \order, \order, #4 ++ ++ cmp \divisor, #(1 << 2) ++ addhi \order, \order, #3 ++ addls \order, \order, \divisor, lsr #1 ++ ++#endif ++ ++.endm ++ ++ ++.macro ARM_MOD_BODY dividend, divisor, order, spare ++ ++#if __LINUX_ARM_ARCH__ >= 5 ++ ++ clz \order, \divisor ++ clz \spare, \dividend ++ sub \order, \order, \spare ++ mov \divisor, \divisor, lsl \order ++ ++#else ++ ++ mov \order, #0 ++ ++ @ Unless the divisor is very big, shift it up in multiples of ++ @ four bits, since this is the amount of unwinding in the main ++ @ division loop. Continue shifting until the divisor is ++ @ larger than the dividend. ++1: cmp \divisor, #0x10000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #4 ++ addlo \order, \order, #4 ++ blo 1b ++ ++ @ For very big divisors, we must shift it a bit at a time, or ++ @ we will be in danger of overflowing. ++1: cmp \divisor, #0x80000000 ++ cmplo \divisor, \dividend ++ movlo \divisor, \divisor, lsl #1 ++ addlo \order, \order, #1 ++ blo 1b ++ ++#endif ++ ++ @ Perform all needed substractions to keep only the reminder. ++ @ Do comparisons in batch of 4 first. ++ subs \order, \order, #3 @ yes, 3 is intended here ++ blt 2f ++ ++1: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ cmp \dividend, \divisor, lsr #1 ++ subhs \dividend, \dividend, \divisor, lsr #1 ++ cmp \dividend, \divisor, lsr #2 ++ subhs \dividend, \dividend, \divisor, lsr #2 ++ cmp \dividend, \divisor, lsr #3 ++ subhs \dividend, \dividend, \divisor, lsr #3 ++ cmp \dividend, #1 ++ mov \divisor, \divisor, lsr #4 ++ subges \order, \order, #4 ++ bge 1b ++ ++ tst \order, #3 ++ teqne \dividend, #0 ++ beq 5f ++ ++ @ Either 1, 2 or 3 comparison/substractions are left. ++2: cmn \order, #2 ++ blt 4f ++ beq 3f ++ cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ mov \divisor, \divisor, lsr #1 ++3: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++ mov \divisor, \divisor, lsr #1 ++4: cmp \dividend, \divisor ++ subhs \dividend, \dividend, \divisor ++5: ++.endm ++ ++ ++ENTRY(__udivsi3) ++ENTRY(__aeabi_uidiv) ++ ++ subs r2, r1, #1 ++ moveq pc, lr ++ bcc Ldiv0 ++ cmp r0, r1 ++ bls 11f ++ tst r1, r2 ++ beq 12f ++ ++ ARM_DIV_BODY r0, r1, r2, r3 ++ ++ mov r0, r2 ++ mov pc, lr ++ ++11: moveq r0, #1 ++ movne r0, #0 ++ mov pc, lr ++ ++12: ARM_DIV2_ORDER r1, r2 ++ ++ mov r0, r0, lsr r2 ++ mov pc, lr ++ ++ENDPROC(__udivsi3) ++ENDPROC(__aeabi_uidiv) ++ ++ENTRY(__umodsi3) ++ ++ subs r2, r1, #1 @ compare divisor with 1 ++ bcc Ldiv0 ++ cmpne r0, r1 @ compare dividend with divisor ++ moveq r0, #0 ++ tsthi r1, r2 @ see if divisor is power of 2 ++ andeq r0, r0, r2 ++ movls pc, lr ++ ++ ARM_MOD_BODY r0, r1, r2, r3 ++ ++ mov pc, lr ++ ++ENDPROC(__umodsi3) ++ ++ENTRY(__divsi3) ++ENTRY(__aeabi_idiv) ++ ++ cmp r1, #0 ++ eor ip, r0, r1 @ save the sign of the result. ++ beq Ldiv0 ++ rsbmi r1, r1, #0 @ loops below use unsigned. ++ subs r2, r1, #1 @ division by 1 or -1 ? ++ beq 10f ++ movs r3, r0 ++ rsbmi r3, r0, #0 @ positive dividend value ++ cmp r3, r1 ++ bls 11f ++ tst r1, r2 @ divisor is power of 2 ? ++ beq 12f ++ ++ ARM_DIV_BODY r3, r1, r0, r2 ++ ++ cmp ip, #0 ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++10: teq ip, r0 @ same sign ? ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++11: movlo r0, #0 ++ moveq r0, ip, asr #31 ++ orreq r0, r0, #1 ++ mov pc, lr ++ ++12: ARM_DIV2_ORDER r1, r2 ++ ++ cmp ip, #0 ++ mov r0, r3, lsr r2 ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++ENDPROC(__divsi3) ++ENDPROC(__aeabi_idiv) ++ ++ENTRY(__modsi3) ++ ++ cmp r1, #0 ++ beq Ldiv0 ++ rsbmi r1, r1, #0 @ loops below use unsigned. ++ movs ip, r0 @ preserve sign of dividend ++ rsbmi r0, r0, #0 @ if negative make positive ++ subs r2, r1, #1 @ compare divisor with 1 ++ cmpne r0, r1 @ compare dividend with divisor ++ moveq r0, #0 ++ tsthi r1, r2 @ see if divisor is power of 2 ++ andeq r0, r0, r2 ++ bls 10f ++ ++ ARM_MOD_BODY r0, r1, r2, r3 ++ ++10: cmp ip, #0 ++ rsbmi r0, r0, #0 ++ mov pc, lr ++ ++ENDPROC(__modsi3) ++ ++#ifdef CONFIG_AEABI ++ ++ENTRY(__aeabi_uidivmod) ++ ++ stmfd sp!, {r0, r1, ip, lr} ++ bl __aeabi_uidiv ++ ldmfd sp!, {r1, r2, ip, lr} ++ mul r3, r0, r2 ++ sub r1, r1, r3 ++ mov pc, lr ++ ++ENDPROC(__aeabi_uidivmod) ++ ++ENTRY(__aeabi_idivmod) ++ ++ stmfd sp!, {r0, r1, ip, lr} ++ bl __aeabi_idiv ++ ldmfd sp!, {r1, r2, ip, lr} ++ mul r3, r0, r2 ++ sub r1, r1, r3 ++ mov pc, lr ++ ++ENDPROC(__aeabi_idivmod) ++ ++#endif ++ ++Ldiv0: ++ ++ str lr, [sp, #-8]! ++ bl __div0 ++ mov r0, #0 @ About as wrong as it could be. ++ ldr pc, [sp], #8 ++ ++ |