From 5a81d6b026ab06123f47e39bc47aedcf23048e6b Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 19 Dec 2012 14:16:30 +0000
Subject: xen: arm: introduce arm32 as a subarch of arm.

- move 32-bit specific files into subarch specific arm32 subdirectory.
- move gic.h to xen/include/asm-arm (it is needed from both subarch
  and generic code).
- make the appropriate build and config file changes to support
  XEN_TARGET_ARCH=arm32.

This prepares us for an eventual 64-bit subarch.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Committed-by: Ian Campbell <ian.campbell@citrix.com>
---
 Config.mk                              |   4 +-
 config/arm.mk                          |  18 --
 config/arm32.mk                        |  18 ++
 xen/Rules.mk                           |   2 +-
 xen/arch/arm/Makefile                  |   9 +-
 xen/arch/arm/Rules.mk                  |  13 +-
 xen/arch/arm/arm32/Makefile            |   5 +
 xen/arch/arm/arm32/asm-offsets.c       |  80 +++++++
 xen/arch/arm/arm32/entry.S             | 141 +++++++++++
 xen/arch/arm/arm32/head.S              | 415 +++++++++++++++++++++++++++++++++
 xen/arch/arm/arm32/lib/Makefile        |   5 +
 xen/arch/arm/arm32/lib/assembler.h     | 325 ++++++++++++++++++++++++++
 xen/arch/arm/arm32/lib/bitops.h        |  87 +++++++
 xen/arch/arm/arm32/lib/changebit.S     |  18 ++
 xen/arch/arm/arm32/lib/clearbit.S      |  19 ++
 xen/arch/arm/arm32/lib/copy_template.S | 267 +++++++++++++++++++++
 xen/arch/arm/arm32/lib/div64.S         | 211 +++++++++++++++++
 xen/arch/arm/arm32/lib/findbit.S       | 198 ++++++++++++++++
 xen/arch/arm/arm32/lib/lib1funcs.S     | 389 ++++++++++++++++++++++++++++++
 xen/arch/arm/arm32/lib/lshrdi3.S       |  54 +++++
 xen/arch/arm/arm32/lib/memcpy.S        |  63 +++++
 xen/arch/arm/arm32/lib/memmove.S       | 200 ++++++++++++++++
 xen/arch/arm/arm32/lib/memset.S        | 129 ++++++++++
 xen/arch/arm/arm32/lib/memzero.S       | 127 ++++++++++
 xen/arch/arm/arm32/lib/setbit.S        |  18 ++
 xen/arch/arm/arm32/lib/testchangebit.S |  18 ++
 xen/arch/arm/arm32/lib/testclearbit.S  |  18 ++
 xen/arch/arm/arm32/lib/testsetbit.S    |  18 ++
 xen/arch/arm/arm32/mode_switch.S       | 121 ++++++++++
 xen/arch/arm/arm32/proc-ca15.S         |  35 +++
 xen/arch/arm/asm-offsets.c             |  80 -------
 xen/arch/arm/domain.c                  |   2 +-
 xen/arch/arm/domain_build.c            |   2 +-
 xen/arch/arm/entry.S                   | 141 -----------
 xen/arch/arm/gic.c                     |   2 +-
 xen/arch/arm/gic.h                     | 172 --------------
 xen/arch/arm/head.S                    | 415 ---------------------------------
 xen/arch/arm/irq.c                     |   2 +-
 xen/arch/arm/lib/Makefile              |   5 -
 xen/arch/arm/lib/assembler.h           | 325 --------------------------
 xen/arch/arm/lib/bitops.h              |  87 -------
 xen/arch/arm/lib/changebit.S           |  18 --
 xen/arch/arm/lib/clearbit.S            |  19 --
 xen/arch/arm/lib/copy_template.S       | 267 ---------------------
 xen/arch/arm/lib/div64.S               | 211 -----------------
 xen/arch/arm/lib/findbit.S             | 198 ----------------
 xen/arch/arm/lib/lib1funcs.S           | 389 ------------------------------
 xen/arch/arm/lib/lshrdi3.S             |  54 -----
 xen/arch/arm/lib/memcpy.S              |  63 -----
 xen/arch/arm/lib/memmove.S             | 200 ----------------
 xen/arch/arm/lib/memset.S              | 129 ----------
 xen/arch/arm/lib/memzero.S             | 127 ----------
 xen/arch/arm/lib/setbit.S              |  18 --
 xen/arch/arm/lib/testchangebit.S       |  18 --
 xen/arch/arm/lib/testclearbit.S        |  18 --
 xen/arch/arm/lib/testsetbit.S          |  18 --
 xen/arch/arm/mode_switch.S             | 121 ----------
 xen/arch/arm/p2m.c                     |   2 +-
 xen/arch/arm/proc-ca15.S               |  35 ---
 xen/arch/arm/setup.c                   |   2 +-
 xen/arch/arm/smpboot.c                 |   2 +-
 xen/arch/arm/traps.c                   |   2 +-
 xen/arch/arm/vgic.c                    |   2 +-
 xen/arch/arm/vtimer.c                  |   2 +-
 xen/include/asm-arm/gic.h              | 170 ++++++++++++++
 65 files changed, 3174 insertions(+), 3169 deletions(-)
 delete mode 100644 config/arm.mk
 create mode 100644 config/arm32.mk
 create mode 100644 xen/arch/arm/arm32/Makefile
 create mode 100644 xen/arch/arm/arm32/asm-offsets.c
 create mode 100644 xen/arch/arm/arm32/entry.S
 create mode 100644 xen/arch/arm/arm32/head.S
 create mode 100644 xen/arch/arm/arm32/lib/Makefile
 create mode 100644 xen/arch/arm/arm32/lib/assembler.h
 create mode 100644 xen/arch/arm/arm32/lib/bitops.h
 create mode 100644 xen/arch/arm/arm32/lib/changebit.S
 create mode 100644 xen/arch/arm/arm32/lib/clearbit.S
 create mode 100644 xen/arch/arm/arm32/lib/copy_template.S
 create mode 100644 xen/arch/arm/arm32/lib/div64.S
 create mode 100644 xen/arch/arm/arm32/lib/findbit.S
 create mode 100644 xen/arch/arm/arm32/lib/lib1funcs.S
 create mode 100644 xen/arch/arm/arm32/lib/lshrdi3.S
 create mode 100644 xen/arch/arm/arm32/lib/memcpy.S
 create mode 100644 xen/arch/arm/arm32/lib/memmove.S
 create mode 100644 xen/arch/arm/arm32/lib/memset.S
 create mode 100644 xen/arch/arm/arm32/lib/memzero.S
 create mode 100644 xen/arch/arm/arm32/lib/setbit.S
 create mode 100644 xen/arch/arm/arm32/lib/testchangebit.S
 create mode 100644 xen/arch/arm/arm32/lib/testclearbit.S
 create mode 100644 xen/arch/arm/arm32/lib/testsetbit.S
 create mode 100644 xen/arch/arm/arm32/mode_switch.S
 create mode 100644 xen/arch/arm/arm32/proc-ca15.S
 delete mode 100644 xen/arch/arm/asm-offsets.c
 delete mode 100644 xen/arch/arm/entry.S
 delete mode 100644 xen/arch/arm/gic.h
 delete mode 100644 xen/arch/arm/head.S
 delete mode 100644 xen/arch/arm/lib/Makefile
 delete mode 100644 xen/arch/arm/lib/assembler.h
 delete mode 100644 xen/arch/arm/lib/bitops.h
 delete mode 100644 xen/arch/arm/lib/changebit.S
 delete mode 100644 xen/arch/arm/lib/clearbit.S
 delete mode 100644 xen/arch/arm/lib/copy_template.S
 delete mode 100644 xen/arch/arm/lib/div64.S
 delete mode 100644 xen/arch/arm/lib/findbit.S
 delete mode 100644 xen/arch/arm/lib/lib1funcs.S
 delete mode 100644 xen/arch/arm/lib/lshrdi3.S
 delete mode 100644 xen/arch/arm/lib/memcpy.S
 delete mode 100644 xen/arch/arm/lib/memmove.S
 delete mode 100644 xen/arch/arm/lib/memset.S
 delete mode 100644 xen/arch/arm/lib/memzero.S
 delete mode 100644 xen/arch/arm/lib/setbit.S
 delete mode 100644 xen/arch/arm/lib/testchangebit.S
 delete mode 100644 xen/arch/arm/lib/testclearbit.S
 delete mode 100644 xen/arch/arm/lib/testsetbit.S
 delete mode 100644 xen/arch/arm/mode_switch.S
 delete mode 100644 xen/arch/arm/proc-ca15.S
 create mode 100644 xen/include/asm-arm/gic.h

diff --git a/Config.mk b/Config.mk
index 028b6d514a..a7d7f79618 100644
--- a/Config.mk
+++ b/Config.mk
@@ -14,7 +14,9 @@ debug ?= y
 debug_symbols ?= $(debug)
 
 XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
-                         -e s/i86pc/x86_32/ -e s/amd64/x86_64/ -e s/arm.*/arm/)
+                         -e s/i86pc/x86_32/ -e s/amd64/x86_64/ \
+                         -e s/armv7.*/arm32/)
+
 XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
 XEN_OS              ?= $(shell uname -s)
 
diff --git a/config/arm.mk b/config/arm.mk
deleted file mode 100644
index f64f0c1c83..0000000000
--- a/config/arm.mk
+++ /dev/null
@@ -1,18 +0,0 @@
-CONFIG_ARM := y
-CONFIG_ARM_32 := y
-CONFIG_ARM_$(XEN_OS) := y
-
-# -march= -mcpu=
-
-# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
-CFLAGS += -marm
-
-HAS_PL011 := y
-
-# Use only if calling $(LD) directly.
-#LDFLAGS_DIRECT_OpenBSD = _obsd
-#LDFLAGS_DIRECT_FreeBSD = _fbsd
-LDFLAGS_DIRECT_Linux = _linux
-LDFLAGS_DIRECT += -marmelf$(LDFLAGS_DIRECT_$(XEN_OS))_eabi
-
-CONFIG_LOAD_ADDRESS ?= 0x80000000
diff --git a/config/arm32.mk b/config/arm32.mk
new file mode 100644
index 0000000000..f64f0c1c83
--- /dev/null
+++ b/config/arm32.mk
@@ -0,0 +1,18 @@
+CONFIG_ARM := y
+CONFIG_ARM_32 := y
+CONFIG_ARM_$(XEN_OS) := y
+
+# -march= -mcpu=
+
+# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
+CFLAGS += -marm
+
+HAS_PL011 := y
+
+# Use only if calling $(LD) directly.
+#LDFLAGS_DIRECT_OpenBSD = _obsd
+#LDFLAGS_DIRECT_FreeBSD = _fbsd
+LDFLAGS_DIRECT_Linux = _linux
+LDFLAGS_DIRECT += -marmelf$(LDFLAGS_DIRECT_$(XEN_OS))_eabi
+
+CONFIG_LOAD_ADDRESS ?= 0x80000000
diff --git a/xen/Rules.mk b/xen/Rules.mk
index f7cb8b2c86..c2db449648 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -28,7 +28,7 @@ endif
 # Set ARCH/SUBARCH appropriately.
 override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
 override TARGET_ARCH     := $(shell echo $(XEN_TARGET_ARCH) | \
-                              sed -e 's/x86.*/x86/')
+                              sed -e 's/x86.*/x86/' -e s'/arm\(32\|64\)/arm/g')
 
 TARGET := $(BASEDIR)/xen
 
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 4c61b042a7..24c0c12625 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -1,8 +1,7 @@
-subdir-y += lib
+subdir-$(arm32) += arm32
 
 obj-y += dummy.o
 obj-y += early_printk.o
-obj-y += entry.o
 obj-y += domain.o
 obj-y += domctl.o
 obj-y += sysctl.o
@@ -12,8 +11,6 @@ obj-y += io.o
 obj-y += irq.o
 obj-y += kernel.o
 obj-y += mm.o
-obj-y += mode_switch.o
-obj-y += proc-ca15.o
 obj-y += p2m.o
 obj-y += percpu.o
 obj-y += guestcopy.o
@@ -36,7 +33,7 @@ obj-y += dtb.o
 AFLAGS += -DCONFIG_DTB_FILE=\"$(CONFIG_DTB_FILE)\"
 endif
 
-ALL_OBJS := head.o $(ALL_OBJS)
+ALL_OBJS := $(TARGET_SUBARCH)/head.o $(ALL_OBJS)
 
 $(TARGET): $(TARGET)-syms $(TARGET).bin
 	# XXX: VE model loads by VMA so instead of
@@ -81,7 +78,7 @@ $(TARGET)-syms: prelink.o xen.lds $(BASEDIR)/common/symbols-dummy.o
 	    $(@D)/.$(@F).1.o -o $@
 	rm -f $(@D)/.$(@F).[0-9]*
 
-asm-offsets.s: asm-offsets.c
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
 	$(CC) $(filter-out -flto,$(CFLAGS)) -S -o $@ $<
 
 xen.lds: xen.lds.S
diff --git a/xen/arch/arm/Rules.mk b/xen/arch/arm/Rules.mk
index a45c654346..f83bfee24d 100644
--- a/xen/arch/arm/Rules.mk
+++ b/xen/arch/arm/Rules.mk
@@ -12,16 +12,19 @@ CFLAGS += -fno-builtin -fno-common -Wredundant-decls
 CFLAGS += -iwithprefix include -Werror -Wno-pointer-arith -pipe
 CFLAGS += -I$(BASEDIR)/include
 
-# Prevent floating-point variables from creeping into Xen.
-CFLAGS += -msoft-float
-
 $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 
 arm := y
 
+ifeq ($(TARGET_SUBARCH),arm32)
+# Prevent floating-point variables from creeping into Xen.
+CFLAGS += -msoft-float
+CFLAGS += -mcpu=cortex-a15 -mfpu=vfpv3 -mfloat-abi=softfp
+arm32 := y
+arm64 := n
+endif
+
 ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
 CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
 endif
-
-CFLAGS += -mcpu=cortex-a15 -mfpu=vfpv3 -mfloat-abi=softfp
diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
new file mode 100644
index 0000000000..20931fa63d
--- /dev/null
+++ b/xen/arch/arm/arm32/Makefile
@@ -0,0 +1,5 @@
+subdir-y += lib
+
+obj-y += entry.o
+obj-y += mode_switch.o
+obj-y += proc-ca15.o
diff --git a/xen/arch/arm/arm32/asm-offsets.c b/xen/arch/arm/arm32/asm-offsets.c
new file mode 100644
index 0000000000..cc1a72a659
--- /dev/null
+++ b/xen/arch/arm/arm32/asm-offsets.c
@@ -0,0 +1,80 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <public/xen.h>
+#include <asm/current.h>
+
+#define DEFINE(_sym, _val) \
+    __asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
+#define BLANK() \
+    __asm__ __volatile__ ( "\n->" : : )
+#define OFFSET(_sym, _str, _mem) \
+    DEFINE(_sym, offsetof(_str, _mem));
+
+/* base-2 logarithm */
+#define __L2(_x)  (((_x) & 0x00000002) ?   1 : 0)
+#define __L4(_x)  (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
+#define __L8(_x)  (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
+#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
+#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
+
+void __dummy__(void)
+{
+   OFFSET(UREGS_sp, struct cpu_user_regs, sp);
+   OFFSET(UREGS_lr, struct cpu_user_regs, lr);
+   OFFSET(UREGS_pc, struct cpu_user_regs, pc);
+   OFFSET(UREGS_cpsr, struct cpu_user_regs, cpsr);
+
+   OFFSET(UREGS_LR_usr, struct cpu_user_regs, lr_usr);
+   OFFSET(UREGS_SP_usr, struct cpu_user_regs, sp_usr);
+
+   OFFSET(UREGS_SP_svc, struct cpu_user_regs, sp_svc);
+   OFFSET(UREGS_LR_svc, struct cpu_user_regs, lr_svc);
+   OFFSET(UREGS_SPSR_svc, struct cpu_user_regs, spsr_svc);
+
+   OFFSET(UREGS_SP_abt, struct cpu_user_regs, sp_abt);
+   OFFSET(UREGS_LR_abt, struct cpu_user_regs, lr_abt);
+   OFFSET(UREGS_SPSR_abt, struct cpu_user_regs, spsr_abt);
+
+   OFFSET(UREGS_SP_und, struct cpu_user_regs, sp_und);
+   OFFSET(UREGS_LR_und, struct cpu_user_regs, lr_und);
+   OFFSET(UREGS_SPSR_und, struct cpu_user_regs, spsr_und);
+
+   OFFSET(UREGS_SP_irq, struct cpu_user_regs, sp_irq);
+   OFFSET(UREGS_LR_irq, struct cpu_user_regs, lr_irq);
+   OFFSET(UREGS_SPSR_irq, struct cpu_user_regs, spsr_irq);
+
+   OFFSET(UREGS_SP_fiq, struct cpu_user_regs, sp_fiq);
+   OFFSET(UREGS_LR_fiq, struct cpu_user_regs, lr_fiq);
+   OFFSET(UREGS_SPSR_fiq, struct cpu_user_regs, spsr_fiq);
+
+   OFFSET(UREGS_R8_fiq, struct cpu_user_regs, r8_fiq);
+   OFFSET(UREGS_R9_fiq, struct cpu_user_regs, r9_fiq);
+   OFFSET(UREGS_R10_fiq, struct cpu_user_regs, r10_fiq);
+   OFFSET(UREGS_R11_fiq, struct cpu_user_regs, r11_fiq);
+   OFFSET(UREGS_R12_fiq, struct cpu_user_regs, r12_fiq);
+
+   OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, cpsr);
+   DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
+   BLANK();
+
+   DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+
+   OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
new file mode 100644
index 0000000000..36114273d7
--- /dev/null
+++ b/xen/arch/arm/arm32/entry.S
@@ -0,0 +1,141 @@
+#include <xen/config.h>
+#include <asm/asm_defns.h>
+#include <public/xen.h>
+
+#define SAVE_ONE_BANKED(reg)    mrs r11, reg; str r11, [sp, #UREGS_##reg]
+#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
+
+#define SAVE_BANKED(mode) \
+        SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; SAVE_ONE_BANKED(SPSR_##mode)
+
+#define RESTORE_BANKED(mode) \
+        RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode)
+
+#define SAVE_ALL                                                        \
+        sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */      \
+        push {r0-r12}; /* Save R0-R12 */                                \
+                                                                        \
+        mrs r11, ELR_hyp;               /* ELR_hyp is return address. */\
+        str r11, [sp, #UREGS_pc];                                       \
+                                                                        \
+        str lr, [sp, #UREGS_lr];                                        \
+                                                                        \
+        add r11, sp, #UREGS_kernel_sizeof+4;                            \
+        str r11, [sp, #UREGS_sp];                                       \
+                                                                        \
+        mrs r11, SPSR_hyp;                                              \
+        str r11, [sp, #UREGS_cpsr];                                     \
+        and r11, #PSR_MODE_MASK;                                        \
+        cmp r11, #PSR_MODE_HYP;                                         \
+        blne save_guest_regs
+
+save_guest_regs:
+        ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
+        str r11, [sp, #UREGS_sp]
+        SAVE_ONE_BANKED(SP_usr)
+        /* LR_usr is the same physical register as lr and is saved in SAVE_ALL */
+        SAVE_BANKED(svc)
+        SAVE_BANKED(abt)
+        SAVE_BANKED(und)
+        SAVE_BANKED(irq)
+        SAVE_BANKED(fiq)
+        SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq)
+        SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
+        mov pc, lr
+
+#define DEFINE_TRAP_ENTRY(trap)                                         \
+        ALIGN;                                                          \
+trap_##trap:                                                            \
+        SAVE_ALL;                                                       \
+        cpsie i;        /* local_irq_enable */                          \
+        adr lr, return_from_trap;                                       \
+        mov r0, sp;                                                     \
+        mov r11, sp;                                                    \
+        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
+        b do_trap_##trap
+
+#define DEFINE_TRAP_ENTRY_NOIRQ(trap)                                   \
+        ALIGN;                                                          \
+trap_##trap:                                                            \
+        SAVE_ALL;                                                       \
+        adr lr, return_from_trap;                                       \
+        mov r0, sp;                                                     \
+        mov r11, sp;                                                    \
+        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
+        b do_trap_##trap
+
+.globl hyp_traps_vector
+        .align 5
+hyp_traps_vector:
+        .word 0                         /* 0x00 - Reset */
+        b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
+        b trap_supervisor_call          /* 0x08 - Supervisor Call */
+        b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
+        b trap_data_abort               /* 0x10 - Data Abort */
+        b trap_hypervisor               /* 0x14 - Hypervisor */
+        b trap_irq                      /* 0x18 - IRQ */
+        b trap_fiq                      /* 0x1c - FIQ */
+
+DEFINE_TRAP_ENTRY(undefined_instruction)
+DEFINE_TRAP_ENTRY(supervisor_call)
+DEFINE_TRAP_ENTRY(prefetch_abort)
+DEFINE_TRAP_ENTRY(data_abort)
+DEFINE_TRAP_ENTRY(hypervisor)
+DEFINE_TRAP_ENTRY_NOIRQ(irq)
+DEFINE_TRAP_ENTRY_NOIRQ(fiq)
+
+return_from_trap:
+        mov sp, r11
+ENTRY(return_to_new_vcpu)
+        ldr r11, [sp, #UREGS_cpsr]
+        and r11, #PSR_MODE_MASK
+        cmp r11, #PSR_MODE_HYP
+        beq return_to_hypervisor
+        /* Fall thru */
+ENTRY(return_to_guest)
+        mov r11, sp
+        bic sp, #7 /* Align the stack pointer */
+        bl leave_hypervisor_tail /* Disables interrupts on return */
+        mov sp, r11
+        RESTORE_ONE_BANKED(SP_usr)
+        /* LR_usr is the same physical register as lr and is restored below */
+        RESTORE_BANKED(svc)
+        RESTORE_BANKED(abt)
+        RESTORE_BANKED(und)
+        RESTORE_BANKED(irq)
+        RESTORE_BANKED(fiq)
+        RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); RESTORE_ONE_BANKED(R10_fiq)
+        RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
+        /* Fall thru */
+ENTRY(return_to_hypervisor)
+        cpsid i
+        ldr lr, [sp, #UREGS_lr]
+        ldr r11, [sp, #UREGS_pc]
+        msr ELR_hyp, r11
+        ldr r11, [sp, #UREGS_cpsr]
+        msr SPSR_hyp, r11
+        pop {r0-r12}
+        add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
+        eret
+
+/*
+ * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next)
+ *
+ * r0 - prev
+ * r1 - next
+ *
+ * Returns prev in r0
+ */
+ENTRY(__context_switch)
+        add     ip, r0, #VCPU_arch_saved_context
+        stmia   ip!, {r4 - sl, fp, sp, lr}      /* Save register state */
+
+        add     r4, r1, #VCPU_arch_saved_context
+        ldmia   r4, {r4 - sl, fp, sp, pc}       /* Load registers and return */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S
new file mode 100644
index 0000000000..93f4edba53
--- /dev/null
+++ b/xen/arch/arm/arm32/head.S
@@ -0,0 +1,415 @@
+/*
+ * xen/arch/arm/head.S
+ *
+ * Start-of-day code for an ARMv7-A with virt extensions.
+ *
+ * Tim Deegan <tim@xen.org>
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/config.h>
+#include <asm/page.h>
+#include <asm/processor-ca15.h>
+#include <asm/asm_defns.h>
+
+#define ZIMAGE_MAGIC_NUMBER 0x016f2818
+
+#define PT_PT  0xe7f /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=1, P=1 */
+#define PT_MEM 0xe7d /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=0, P=1 */
+#define PT_DEV 0xe71 /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=0, P=1 */
+#define PT_DEV_L3 0xe73 /* lev3: nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=1, P=1 */
+
+#define PT_UPPER(x) (PT_##x & 0xf00)
+#define PT_LOWER(x) (PT_##x & 0x0ff)
+
+/* Macro to print a string to the UART, if there is one.
+ * Clobbers r0-r3. */
+#ifdef EARLY_UART_ADDRESS
+#define PRINT(_s)       \
+        adr   r0, 98f ; \
+        bl    puts    ; \
+        b     99f     ; \
+98:     .asciz _s     ; \
+        .align 2      ; \
+99:
+#else
+#define PRINT(s)
+#endif
+
+        .arm
+
+        /* This must be the very first address in the loaded image.
+         * It should be linked at XEN_VIRT_START, and loaded at any
+         * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
+         * or the initial pagetable code below will need adjustment. */
+        .global start
+start:
+
+        /* zImage magic header, see:
+         * http://www.simtec.co.uk/products/SWLINUX/files/booting_article.html#d0e309
+         */
+        .rept 8
+        mov   r0, r0
+        .endr
+        b     past_zImage
+
+        .word ZIMAGE_MAGIC_NUMBER    /* Magic numbers to help the loader */
+        .word 0x00000000             /* absolute load/run zImage address or
+                                      * 0 for PiC */
+        .word (_end - start)         /* zImage end address */
+
+past_zImage:
+        cpsid aif                    /* Disable all interrupts */
+
+        /* Save the bootloader arguments in less-clobberable registers */
+        mov   r7, r1                 /* r7 := ARM-linux machine type */
+        mov   r8, r2                 /* r8 := ATAG base address */
+
+        /* Find out where we are */
+        ldr   r0, =start
+        adr   r9, start              /* r9  := paddr (start) */
+        sub   r10, r9, r0            /* r10 := phys-offset */
+
+        /* Using the DTB in the .dtb section? */
+#ifdef CONFIG_DTB_FILE
+        ldr   r8, =_sdtb
+        add   r8, r10                /* r8 := paddr(DTB) */
+#endif
+
+        /* Are we the boot CPU? */
+        mov   r12, #0                /* r12 := CPU ID */
+        mrc   CP32(r0, MPIDR)
+        tst   r0, #(1<<31)           /* Multiprocessor extension supported? */
+        beq   boot_cpu
+        tst   r0, #(1<<30)           /* Uniprocessor system? */
+        bne   boot_cpu
+        bics  r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
+        beq   boot_cpu               /* If we're CPU 0, boot now */
+
+        /* Non-boot CPUs wait here to be woken up one at a time. */
+1:      dsb
+        ldr   r0, =smp_up_cpu        /* VA of gate */
+        add   r0, r0, r10            /* PA of gate */
+        ldr   r1, [r0]               /* Which CPU is being booted? */
+        teq   r1, r12                /* Is it us? */
+        wfene
+        bne   1b
+
+boot_cpu:
+#ifdef EARLY_UART_ADDRESS
+        ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
+        teq   r12, #0                   /* CPU 0 sets up the UART too */
+        bleq  init_uart
+        PRINT("- CPU ")
+        mov   r0, r12
+        bl    putn
+        PRINT(" booting -\r\n")
+#endif
+
+        /* Wake up secondary cpus */
+        teq   r12, #0
+        bleq  kick_cpus
+
+        /* Check that this CPU has Hyp mode */
+        mrc   CP32(r0, ID_PFR1)
+        and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
+        teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
+        beq   1f
+        PRINT("- CPU doesn't support the virtualization extensions -\r\n")
+        b     fail
+1:
+        /* Check if we're already in it */
+        mrs   r0, cpsr
+        and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
+        teq   r0, #0x1a              /* Hyp Mode? */
+        bne   1f
+        PRINT("- Started in Hyp mode -\r\n")
+        b     hyp
+1:
+        /* Otherwise, it must have been Secure Supervisor mode */
+        mrc   CP32(r0, SCR)
+        tst   r0, #0x1               /* Not-Secure bit set? */
+        beq   1f
+        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
+        b     fail
+1:
+        /* OK, we're in Secure state. */
+        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
+        ldr   r0, =enter_hyp_mode    /* VA of function */
+        adr   lr, hyp                /* Set return address for call */
+        add   pc, r0, r10            /* Call PA of function */
+
+hyp:
+
+        /* Zero BSS On the boot CPU to avoid nasty surprises */
+        teq   r12, #0
+        bne   skip_bss
+
+        PRINT("- Zero BSS -\r\n")
+        ldr   r0, =__bss_start       /* Load start & end of bss */
+        ldr   r1, =__bss_end
+        add   r0, r0, r10            /* Apply physical offset */
+        add   r1, r1, r10
+
+        mov   r2, #0
+1:      str   r2, [r0], #4
+        cmp   r0, r1
+        blo   1b
+
+skip_bss:
+
+        PRINT("- Setting up control registers -\r\n")
+
+        /* Read CPU ID */
+        mrc   CP32(r0, MIDR)
+        ldr   r1, =(MIDR_MASK)
+        and   r0, r0, r1
+        /* Is this a Cortex A15? */
+        ldr   r1, =(CORTEX_A15_ID)
+        teq   r0, r1
+        bleq  cortex_a15_init
+
+        /* Set up memory attribute type tables */
+        ldr   r0, =MAIR0VAL
+        ldr   r1, =MAIR1VAL
+        mcr   CP32(r0, MAIR0)
+        mcr   CP32(r1, MAIR1)
+        mcr   CP32(r0, HMAIR0)
+        mcr   CP32(r1, HMAIR1)
+
+        /* Set up the HTCR:
+         * PT walks use Outer-Shareable accesses,
+         * PT walks are write-back, no-write-allocate in both cache levels,
+         * Full 32-bit address space goes through this table. */
+        ldr   r0, =0x80002500
+        mcr   CP32(r0, HTCR)
+
+        /* Set up the HSCTLR:
+         * Exceptions in LE ARM,
+         * Low-latency IRQs disabled,
+         * Write-implies-XN disabled (for now),
+         * D-cache disabled (for now),
+         * I-cache enabled,
+         * Alignment checking enabled,
+         * MMU translation disabled (for now). */
+        ldr   r0, =(HSCTLR_BASE|SCTLR_A)
+        mcr   CP32(r0, HSCTLR)
+
+        /* Write Xen's PT's paddr into the HTTBR */
+        ldr   r4, =xen_pgtable
+        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
+        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
+        mcrr  CP64(r4, r5, HTTBR)
+
+        /* Non-boot CPUs don't need to rebuild the pagetable */
+        teq   r12, #0
+        bne   pt_ready
+
+        /* console fixmap */
+#ifdef EARLY_UART_ADDRESS
+        ldr   r1, =xen_fixmap
+        add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
+        mov   r3, #0
+        lsr   r2, r11, #12
+        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
+        orr   r2, r2, #PT_UPPER(DEV_L3)
+        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */
+        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's slot */
+#endif
+
+        /* Build the baseline idle pagetable's first-level entries */
+        ldr   r1, =xen_second
+        add   r1, r1, r10            /* r1 := paddr (xen_second) */
+        mov   r3, #0x0
+        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
+        orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
+        strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
+        add   r2, r2, #0x1000
+        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
+
+        /* Now set up the second-level entries */
+        orr   r2, r9, #PT_UPPER(MEM)
+        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
+        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
+        strd  r2, r3, [r1, r4]       /* Map Xen there */
+        ldr   r4, =start
+        lsr   r4, #18                /* Slot for vaddr(start) */
+        strd  r2, r3, [r1, r4]       /* Map Xen there too */
+
+        /* xen_fixmap pagetable */
+        ldr   r2, =xen_fixmap
+        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
+        orr   r2, r2, #PT_UPPER(PT)
+        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
+        add   r4, r4, #8
+        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
+
+        mov   r3, #0x0
+        lsr   r2, r8, #21
+        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
+        orr   r2, r2, #PT_UPPER(MEM)
+        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
+        add   r4, r4, #8
+        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
+
+pt_ready:
+        PRINT("- Turning on paging -\r\n")
+
+        ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
+        mrc   CP32(r0, HSCTLR)
+        orr   r0, r0, #(SCTLR_M|SCTLR_C) /* Enable MMU and D-cache */
+        dsb                          /* Flush PTE writes and finish reads */
+        mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
+        isb                          /* Now, flush the icache */
+        mov   pc, r1                 /* Get a proper vaddr into PC */
+paging:
+
+
+#ifdef EARLY_UART_ADDRESS
+        /* Use a virtual address to access the UART. */
+        ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
+#endif
+
+        PRINT("- Ready -\r\n")
+
+        /* The boot CPU should go straight into C now */
+        teq   r12, #0
+        beq   launch
+
+        /* Non-boot CPUs need to move on to the relocated pagetables */
+        mov   r0, #0
+        ldr   r4, =boot_httbr        /* VA of HTTBR value stashed by CPU 0 */
+        add   r4, r4, r10            /* PA of it */
+        ldrd  r4, r5, [r4]           /* Actual value */
+        dsb
+        mcrr  CP64(r4, r5, HTTBR)
+        dsb
+        isb
+        mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
+        mcr   CP32(r0, ICIALLU)      /* Flush I-cache */
+        mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
+        dsb                          /* Ensure completion of TLB+BP flush */
+        isb
+
+        /* Non-boot CPUs report that they've got this far */
+        ldr   r0, =ready_cpus
+1:      ldrex r1, [r0]               /*            { read # of ready CPUs } */
+        add   r1, r1, #1             /* Atomically { ++                   } */
+        strex r2, r1, [r0]           /*            { writeback            } */
+        teq   r2, #0
+        bne   1b
+        dsb
+        mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
+        dsb
+
+        /* Here, the non-boot CPUs must wait again -- they're now running on
+         * the boot CPU's pagetables so it's safe for the boot CPU to
+         * overwrite the non-relocated copy of Xen.  Once it's done that,
+         * and brought up the memory allocator, non-boot CPUs can get their
+         * own stacks and enter C. */
+1:      wfe
+        dsb
+        ldr   r0, =smp_up_cpu
+        ldr   r1, [r0]               /* Which CPU is being booted? */
+        teq   r1, r12                /* Is it us? */
+        bne   1b
+
+launch:
+        ldr   r0, =init_stack        /* Find the boot-time stack */
+        ldr   sp, [r0]
+        add   sp, #STACK_SIZE        /* (which grows down from the top). */
+        sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
+        mov   r0, r10                /* Marshal args: - phys_offset */
+        mov   r1, r7                 /*               - machine type */
+        mov   r2, r8                 /*               - ATAG address */
+        movs  r3, r12                /*               - CPU ID */
+        beq   start_xen              /* and disappear into the land of C */
+        b     start_secondary        /* (to the appropriate entry point) */
+
+/* Fail-stop
+ * r0: string explaining why */
+fail:   PRINT("- Boot failed -\r\n")
+1:      wfe
+        b     1b
+
+#ifdef EARLY_UART_ADDRESS
+
+/* Bring up the UART. Specific to the PL011 UART.
+ * Clobbers r0-r2 */
+init_uart:
+        mov   r1, #0x0
+        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
+        mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
+        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
+        mov   r1, #0x60              /* 8n1 */
+        str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
+        ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
+        str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
+        adr   r0, 1f
+        b     puts
+1:      .asciz "- UART enabled -\r\n"
+        .align 4
+
+/* Print early debug messages.  Specific to the PL011 UART.
+ * r0: Nul-terminated string to print.
+ * Clobbers r0-r2 */
+puts:
+        ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+        tst   r2, #0x8               /* Check BUSY bit */
+        bne   puts                   /* Wait for the UART to be ready */
+        ldrb  r2, [r0], #1           /* Load next char */
+        teq   r2, #0                 /* Exit on nul */
+        moveq pc, lr
+        str   r2, [r11]              /* -> UARTDR (Data Register) */
+        b     puts
+
+/* Print a 32-bit number in hex.  Specific to the PL011 UART.
+ * r0: Number to print.
+ * clobbers r0-r3 */
+putn:
+        adr   r1, hex
+        mov   r3, #8
+1:      ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
+        tst   r2, #0x8               /* Check BUSY bit */
+        bne   1b                     /* Wait for the UART to be ready */
+        and   r2, r0, #0xf0000000    /* Mask off the top nybble */
+        ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
+        str   r2, [r11]              /* -> UARTDR (Data Register) */
+        lsl   r0, #4                 /* Roll it through one nybble at a time */
+        subs  r3, r3, #1
+        bne   1b
+        mov   pc, lr
+
+hex:    .ascii "0123456789abcdef"
+        .align 2
+
+#else  /* EARLY_UART_ADDRESS */
+
+init_uart:
+.global early_puts
+early_puts:
+puts:
+putn:   mov   pc, lr
+
+#endif /* EARLY_UART_ADDRESS */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm32/lib/Makefile b/xen/arch/arm/arm32/lib/Makefile
new file mode 100644
index 0000000000..4cf41f41e1
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/Makefile
@@ -0,0 +1,5 @@
+obj-y += memcpy.o memmove.o memset.o memzero.o
+obj-y += findbit.o setbit.o
+obj-y += setbit.o clearbit.o changebit.o
+obj-y += testsetbit.o testclearbit.o testchangebit.o
+obj-y += lib1funcs.o lshrdi3.o div64.o
diff --git a/xen/arch/arm/arm32/lib/assembler.h b/xen/arch/arm/arm32/lib/assembler.h
new file mode 100644
index 0000000000..f8d4b3aa04
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/assembler.h
@@ -0,0 +1,325 @@
+/* From Linux arch/arm/include/asm/assembler.h */
+/*
+ *  arch/arm/include/asm/assembler.h
+ *
+ *  Copyright (C) 1996-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains arm architecture specific defines
+ *  for the different processors.
+ *
+ *  Do not include any C declarations in this file - it is included by
+ *  assembler source.
+ */
+#ifndef __ASM_ASSEMBLER_H__
+#define __ASM_ASSEMBLER_H__
+
+#ifndef __ASSEMBLY__
+#error "Only include this from assembly code"
+#endif
+
+// No Thumb, hence:
+#define W(instr)        instr
+#define ARM(instr...)   instr
+#define THUMB(instr...)
+
+#ifdef CONFIG_ARM_UNWIND
+#define UNWIND(code...)         code
+#else
+#define UNWIND(code...)
+#endif
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull            lsr
+#define push            lsl
+#define get_byte_0      lsl #0
+#define get_byte_1	lsr #8
+#define get_byte_2	lsr #16
+#define get_byte_3	lsr #24
+#define put_byte_0      lsl #0
+#define put_byte_1	lsl #8
+#define put_byte_2	lsl #16
+#define put_byte_3	lsl #24
+#else
+#define pull            lsl
+#define push            lsr
+#define get_byte_0	lsr #24
+#define get_byte_1	lsr #16
+#define get_byte_2	lsr #8
+#define get_byte_3      lsl #0
+#define put_byte_0	lsl #24
+#define put_byte_1	lsl #16
+#define put_byte_2	lsl #8
+#define put_byte_3      lsl #0
+#endif
+
+/*
+ * Data preload for architectures that support it
+ */
+#if __LINUX_ARM_ARCH__ >= 5
+#define PLD(code...)	code
+#else
+#define PLD(code...)
+#endif
+
+/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory.  Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
+/*
+ * Enable and disable interrupts
+ */
+#if __LINUX_ARM_ARCH__ >= 6
+	.macro	disable_irq_notrace
+	cpsid	i
+	.endm
+
+	.macro	enable_irq_notrace
+	cpsie	i
+	.endm
+#else
+	.macro	disable_irq_notrace
+	msr	cpsr_c, #PSR_I_BIT | SVC_MODE
+	.endm
+
+	.macro	enable_irq_notrace
+	msr	cpsr_c, #SVC_MODE
+	.endm
+#endif
+
+	.macro asm_trace_hardirqs_off
+#if defined(CONFIG_TRACE_IRQFLAGS)
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl	trace_hardirqs_off
+	ldmia	sp!, {r0-r3, ip, lr}
+#endif
+	.endm
+
+	.macro asm_trace_hardirqs_on_cond, cond
+#if defined(CONFIG_TRACE_IRQFLAGS)
+	/*
+	 * actually the registers should be pushed and pop'd conditionally, but
+	 * after bl the flags are certainly clobbered
+	 */
+	stmdb   sp!, {r0-r3, ip, lr}
+	bl\cond	trace_hardirqs_on
+	ldmia	sp!, {r0-r3, ip, lr}
+#endif
+	.endm
+
+	.macro asm_trace_hardirqs_on
+	asm_trace_hardirqs_on_cond al
+	.endm
+
+	.macro disable_irq
+	disable_irq_notrace
+	asm_trace_hardirqs_off
+	.endm
+
+	.macro enable_irq
+	asm_trace_hardirqs_on
+	enable_irq_notrace
+	.endm
+/*
+ * Save the current IRQ state and disable IRQs.  Note that this macro
+ * assumes FIQs are enabled, and that the processor is in SVC mode.
+ */
+	.macro	save_and_disable_irqs, oldcpsr
+	mrs	\oldcpsr, cpsr
+	disable_irq
+	.endm
+
+/*
+ * Restore interrupt state previously stored in a register.  We don't
+ * guarantee that this will preserve the flags.
+ */
+	.macro	restore_irqs_notrace, oldcpsr
+	msr	cpsr_c, \oldcpsr
+	.endm
+
+	.macro restore_irqs, oldcpsr
+	tst	\oldcpsr, #PSR_I_BIT
+	asm_trace_hardirqs_on_cond eq
+	restore_irqs_notrace \oldcpsr
+	.endm
+
+#define USER(x...)				\
+9999:	x;					\
+	.pushsection __ex_table,"a";		\
+	.align	3;				\
+	.long	9999b,9001f;			\
+	.popsection
+
+#ifdef CONFIG_SMP
+#define ALT_SMP(instr...)					\
+9998:	instr
+/*
+ * Note: if you get assembler errors from ALT_UP() when building with
+ * CONFIG_THUMB2_KERNEL, you almost certainly need to use
+ * ALT_SMP( W(instr) ... )
+ */
+#define ALT_UP(instr...)					\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+9997:	instr							;\
+	.if . - 9997b != 4					;\
+		.error "ALT_UP() content must assemble to exactly 4 bytes";\
+	.endif							;\
+	.popsection
+#define ALT_UP_B(label)					\
+	.equ	up_b_offset, label - 9998b			;\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+	W(b)	. + up_b_offset					;\
+	.popsection
+#else
+#define ALT_SMP(instr...)
+#define ALT_UP(instr...) instr
+#define ALT_UP_B(label) b label
+#endif
+
+/*
+ * Instruction barrier
+ */
+	.macro	instr_sync
+#if __LINUX_ARM_ARCH__ >= 7
+	isb
+#elif __LINUX_ARM_ARCH__ == 6
+	mcr	p15, 0, r0, c7, c5, 4
+#endif
+	.endm
+
+/*
+ * SMP data memory barrier
+ */
+	.macro	smp_dmb mode
+#ifdef CONFIG_SMP
+#if __LINUX_ARM_ARCH__ >= 7
+	.ifeqs "\mode","arm"
+	ALT_SMP(dmb)
+	.else
+	ALT_SMP(W(dmb))
+	.endif
+#elif __LINUX_ARM_ARCH__ == 6
+	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
+#else
+#error Incompatible SMP platform
+#endif
+	.ifeqs "\mode","arm"
+	ALT_UP(nop)
+	.else
+	ALT_UP(W(nop))
+	.endif
+#endif
+	.endm
+
+#ifdef CONFIG_THUMB2_KERNEL
+	.macro	setmode, mode, reg
+	mov	\reg, #\mode
+	msr	cpsr_c, \reg
+	.endm
+#else
+	.macro	setmode, mode, reg
+	msr	cpsr_c, #\mode
+	.endm
+#endif
+
+/*
+ * STRT/LDRT access macros with ARM and Thumb-2 variants
+ */
+#ifdef CONFIG_THUMB2_KERNEL
+
+	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
+9999:
+	.if	\inc == 1
+	\instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
+	.elseif	\inc == 4
+	\instr\cond\()\t\().w \reg, [\ptr, #\off]
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.pushsection __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.popsection
+	.endm
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	@ explicit IT instruction needed because of the label
+	@ introduced by the USER macro
+	.ifnc	\cond,al
+	.if	\rept == 1
+	itt	\cond
+	.elseif	\rept == 2
+	ittt	\cond
+	.else
+	.error	"Unsupported rept macro argument"
+	.endif
+	.endif
+
+	@ Slightly optimised to avoid incrementing the pointer twice
+	usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
+	.if	\rept == 2
+	usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
+	.endif
+
+	add\cond \ptr, #\rept * \inc
+	.endm
+
+#else	/* !CONFIG_THUMB2_KERNEL */
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
+	.rept	\rept
+9999:
+	.if	\inc == 1
+	\instr\cond\()b\()\t \reg, [\ptr], #\inc
+	.elseif	\inc == 4
+	\instr\cond\()\t \reg, [\ptr], #\inc
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.pushsection __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.popsection
+	.endr
+	.endm
+
+#endif	/* CONFIG_THUMB2_KERNEL */
+
+	.macro	strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	str, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
+
+	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
+
+/* Utility macro for declaring string literals */
+	.macro	string name:req, string
+	.type \name , #object
+\name:
+	.asciz "\string"
+	.size \name , . - \name
+	.endm
+
+#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/xen/arch/arm/arm32/lib/bitops.h b/xen/arch/arm/arm32/lib/bitops.h
new file mode 100644
index 0000000000..689f2e8665
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/bitops.h
@@ -0,0 +1,87 @@
+#include <xen/config.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+	.macro	bitop, instr
+	ands	ip, r1, #3
+	strneb	r1, [ip]		@ assert word-aligned
+	mov	r2, #1
+	and	r3, r0, #31		@ Get bit offset
+	mov	r0, r0, lsr #5
+	add	r1, r1, r0, lsl #2	@ Get word offset
+	mov	r3, r2, lsl r3
+1:	ldrex	r2, [r1]
+	\instr	r2, r2, r3
+	strex	r0, r2, [r1]
+	cmp	r0, #0
+	bne	1b
+	bx	lr
+	.endm
+
+	.macro	testop, instr, store
+	ands	ip, r1, #3
+	strneb	r1, [ip]		@ assert word-aligned
+	mov	r2, #1
+	and	r3, r0, #31		@ Get bit offset
+	mov	r0, r0, lsr #5
+	add	r1, r1, r0, lsl #2	@ Get word offset
+	mov	r3, r2, lsl r3		@ create mask
+	smp_dmb
+1:	ldrex	r2, [r1]
+	ands	r0, r2, r3		@ save old value of bit
+	\instr	r2, r2, r3		@ toggle bit
+	strex	ip, r2, [r1]
+	cmp	ip, #0
+	bne	1b
+	smp_dmb
+	cmp	r0, #0
+	movne	r0, #1
+2:	bx	lr
+	.endm
+#else
+	.macro	bitop, name, instr
+ENTRY(	\name		)
+UNWIND(	.fnstart	)
+	ands	ip, r1, #3
+	strneb	r1, [ip]		@ assert word-aligned
+	and	r2, r0, #31
+	mov	r0, r0, lsr #5
+	mov	r3, #1
+	mov	r3, r3, lsl r2
+	save_and_disable_irqs ip
+	ldr	r2, [r1, r0, lsl #2]
+	\instr	r2, r2, r3
+	str	r2, [r1, r0, lsl #2]
+	restore_irqs ip
+	mov	pc, lr
+UNWIND(	.fnend		)
+ENDPROC(\name		)
+	.endm
+
+/**
+ * testop - implement a test_and_xxx_bit operation.
+ * @instr: operational instruction
+ * @store: store instruction
+ *
+ * Note: we can trivially conditionalise the store instruction
+ * to avoid dirtying the data cache.
+ */
+	.macro	testop, name, instr, store
+ENTRY(	\name		)
+UNWIND(	.fnstart	)
+	ands	ip, r1, #3
+	strneb	r1, [ip]		@ assert word-aligned
+	and	r3, r0, #31
+	mov	r0, r0, lsr #5
+	save_and_disable_irqs ip
+	ldr	r2, [r1, r0, lsl #2]!
+	mov	r0, #1
+	tst	r2, r0, lsl r3
+	\instr	r2, r2, r0, lsl r3
+	\store	r2, [r1]
+	moveq	r0, #0
+	restore_irqs ip
+	mov	pc, lr
+UNWIND(	.fnend		)
+ENDPROC(\name		)
+	.endm
+#endif
diff --git a/xen/arch/arm/arm32/lib/changebit.S b/xen/arch/arm/arm32/lib/changebit.S
new file mode 100644
index 0000000000..62954bcd07
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/changebit.S
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/changebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_change_bit)
+	bitop	eor
+ENDPROC(_change_bit)
diff --git a/xen/arch/arm/arm32/lib/clearbit.S b/xen/arch/arm/arm32/lib/clearbit.S
new file mode 100644
index 0000000000..42ce41656d
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/clearbit.S
@@ -0,0 +1,19 @@
+/*
+ *  linux/arch/arm/lib/clearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_clear_bit)
+	bitop	bic
+ENDPROC(_clear_bit)
diff --git a/xen/arch/arm/arm32/lib/copy_template.S b/xen/arch/arm/arm32/lib/copy_template.S
new file mode 100644
index 0000000000..805e3f8fb0
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/copy_template.S
@@ -0,0 +1,267 @@
+/*
+ *  linux/arch/arm/lib/copy_template.s
+ *
+ *  Code template for optimized memory copy functions
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+/*
+ * Theory of operation
+ * -------------------
+ *
+ * This file provides the core code for a forward memory copy used in
+ * the implementation of memcopy(), copy_to_user() and copy_from_user().
+ *
+ * The including file must define the following accessor macros
+ * according to the need of the given function:
+ *
+ * ldr1w ptr reg abort
+ *
+ *	This loads one word from 'ptr', stores it in 'reg' and increments
+ *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *
+ * ldr4w ptr reg1 reg2 reg3 reg4 abort
+ * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ *
+ *	This loads four or eight words starting from 'ptr', stores them
+ *	in provided registers and increments 'ptr' past those words.
+ *	The'abort' argument is used for fixup tables.
+ *
+ * ldr1b ptr reg cond abort
+ *
+ *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ *	It also must apply the condition code if provided, otherwise the
+ *	"al" condition is assumed by default.
+ *
+ * str1w ptr reg abort
+ * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ * str1b ptr reg cond abort
+ *
+ *	Same as their ldr* counterparts, but data is stored to 'ptr' location
+ *	rather than being loaded.
+ *
+ * enter reg1 reg2
+ *
+ *	Preserve the provided registers on the stack plus any additional
+ *	data as needed by the implementation including this code. Called
+ *	upon code entry.
+ *
+ * exit reg1 reg2
+ *
+ *	Restore registers with the values previously saved with the
+ *	'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ *	Correction to be applied to the "ip" register when branching into
+ *	the ldr1w or str1w instructions (some of these macros may expand to
+ *	than one 32bit instruction in Thumb-2)
+ */
+
+
+		enter	r4, lr
+
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #0]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r0, #31		)
+	CALGN(	rsb	r3, ip, #32		)
+	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, r3		)  @ C gets set
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #0]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+3:	PLD(	pld	[r1, #124]		)
+4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		subs	r2, r2, #32
+		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+#if LDR1W_SHIFT > 0
+		lsl	ip, ip, #LDR1W_SHIFT
+#endif
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:
+		.rept	(1 << LDR1W_SHIFT)
+		W(nop)
+		.endr
+		ldr1w	r1, r3, abort=20f
+		ldr1w	r1, r4, abort=20f
+		ldr1w	r1, r5, abort=20f
+		ldr1w	r1, r6, abort=20f
+		ldr1w	r1, r7, abort=20f
+		ldr1w	r1, r8, abort=20f
+		ldr1w	r1, lr, abort=20f
+
+#if LDR1W_SHIFT < STR1W_SHIFT
+		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
+		add	pc, pc, ip
+		nop
+		.rept	(1 << STR1W_SHIFT)
+		W(nop)
+		.endr
+		str1w	r0, r3, abort=20f
+		str1w	r0, r4, abort=20f
+		str1w	r0, r5, abort=20f
+		str1w	r0, r6, abort=20f
+		str1w	r0, r7, abort=20f
+		str1w	r0, r8, abort=20f
+		str1w	r0, lr, abort=20f
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldr1b	r1, r3, ne, abort=21f
+		ldr1b	r1, r4, cs, abort=21f
+		ldr1b	r1, ip, cs, abort=21f
+		str1b	r0, r3, ne, abort=21f
+		str1b	r0, r4, cs, abort=21f
+		str1b	r0, ip, cs, abort=21f
+
+		exit	r4, pc
+
+9:		rsb	ip, ip, #4
+		cmp	ip, #2
+		ldr1b	r1, r3, gt, abort=21f
+		ldr1b	r1, r4, ge, abort=21f
+		ldr1b	r1, lr, abort=21f
+		str1b	r0, r3, gt, abort=21f
+		str1b	r0, r4, ge, abort=21f
+		subs	r2, r2, ip
+		str1b	r0, lr, abort=21f
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr1w	r1, lr, abort=21f
+		beq	17f
+		bgt	18f
+
+
+		.macro	forward_copy_shift pull push
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r0, #31		)
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #0]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #28]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #60]		)
+	PLD(	pld	[r1, #92]		)
+
+12:	PLD(	pld	[r1, #124]		)
+13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
+		mov	r3, lr, pull #\pull
+		subs	r2, r2, #32
+		ldr4w	r1, r8, r9, ip, lr, abort=19f
+		orr	r3, r3, r4, push #\push
+		mov	r4, r4, pull #\pull
+		orr	r4, r4, r5, push #\push
+		mov	r5, r5, pull #\pull
+		orr	r5, r5, r6, push #\push
+		mov	r6, r6, pull #\pull
+		orr	r6, r6, r7, push #\push
+		mov	r7, r7, pull #\pull
+		orr	r7, r7, r8, push #\push
+		mov	r8, r8, pull #\pull
+		orr	r8, r8, r9, push #\push
+		mov	r9, r9, pull #\pull
+		orr	r9, r9, ip, push #\push
+		mov	ip, ip, pull #\pull
+		orr	ip, ip, lr, push #\push
+		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov	r3, lr, pull #\pull
+		ldr1w	r1, lr, abort=21f
+		subs	ip, ip, #4
+		orr	r3, r3, lr, push #\push
+		str1w	r0, r3, abort=21f
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		sub	r1, r1, #(\push / 8)
+		b	8b
+
+		.endm
+
+
+		forward_copy_shift	pull=8	push=24
+
+17:		forward_copy_shift	pull=16	push=16
+
+18:		forward_copy_shift	pull=24	push=8
+
+
+/*
+ * Abort preamble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+	.macro	copy_abort_preamble
+19:	ldmfd	sp!, {r5 - r9}
+	b	21f
+20:	ldmfd	sp!, {r5 - r8}
+21:
+	.endm
+
+	.macro	copy_abort_end
+	ldmfd	sp!, {r4, pc}
+	.endm
+
diff --git a/xen/arch/arm/arm32/lib/div64.S b/xen/arch/arm/arm32/lib/div64.S
new file mode 100644
index 0000000000..83a5f22a84
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/div64.S
@@ -0,0 +1,211 @@
+/*
+ *  linux/arch/arm/lib/div64.S
+ *
+ *  Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Oct 5, 2003
+ *  Copyright:	Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+#include "assembler.h"
+	
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ *       This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ * 	xh-xl	= dividend (clobbered)
+ * 	r4	= divisor (preserved)
+ *
+ * Output values:
+ * 	yh-yl	= result
+ * 	xh	= remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+
+ENTRY(__do_div64)
+UNWIND(.fnstart)
+
+	@ Test for easy paths first.
+	subs	ip, r4, #1
+	bls	9f			@ divisor is 0 or 1
+	tst	ip, r4
+	beq	8f			@ divisor is power of 2
+
+	@ See if we need to handle upper 32-bit result.
+	cmp	xh, r4
+	mov	yh, #0
+	blo	3f
+
+	@ Align divisor with upper part of dividend.
+	@ The aligned divisor is stored in yl preserving the original.
+	@ The bit position is stored in ip.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	yl, r4
+	clz	ip, xh
+	sub	yl, yl, ip
+	mov	ip, #1
+	mov	ip, ip, lsl yl
+	mov	yl, r4, lsl yl
+
+#else
+
+	mov	yl, r4
+	mov	ip, #1
+1:	cmp	yl, #0x80000000
+	cmpcc	yl, xh
+	movcc	yl, yl, lsl #1
+	movcc	ip, ip, lsl #1
+	bcc	1b
+
+#endif
+
+	@ The division loop for needed upper bit positions.
+ 	@ Break out early if dividend reaches 0.
+2:	cmp	xh, yl
+	orrcs	yh, yh, ip
+	subcss	xh, xh, yl
+	movnes	ip, ip, lsr #1
+	mov	yl, yl, lsr #1
+	bne	2b
+
+	@ See if we need to handle lower 32-bit result.
+3:	cmp	xh, #0
+	mov	yl, #0
+	cmpeq	xl, r4
+	movlo	xh, xl
+	movlo	pc, lr
+
+	@ The division loop for lower bit positions.
+	@ Here we shift remainer bits leftwards rather than moving the
+	@ divisor for comparisons, considering the carry-out bit as well.
+	mov	ip, #0x80000000
+4:	movs	xl, xl, lsl #1
+	adcs	xh, xh, xh
+	beq	6f
+	cmpcc	xh, r4
+5:	orrcs	yl, yl, ip
+	subcs	xh, xh, r4
+	movs	ip, ip, lsr #1
+	bne	4b
+	mov	pc, lr
+
+	@ The top part of remainder became zero.  If carry is set
+	@ (the 33th bit) this is a false positive so resume the loop.
+	@ Otherwise, if lower part is also null then we are done.
+6:	bcs	5b
+	cmp	xl, #0
+	moveq	pc, lr
+
+	@ We still have remainer bits in the low part.  Bring them up.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	xh, xl			@ we know xh is zero here so...
+	add	xh, xh, #1
+	mov	xl, xl, lsl xh
+	mov	ip, ip, lsr xh
+
+#else
+
+7:	movs	xl, xl, lsl #1
+	mov	ip, ip, lsr #1
+	bcc	7b
+
+#endif
+
+	@ Current remainder is now 1.  It is worthless to compare with
+	@ divisor at this point since divisor can not be smaller than 3 here.
+	@ If possible, branch for another shift in the division loop.
+	@ If no bit position left then we are done.
+	movs	ip, ip, lsr #1
+	mov	xh, #1
+	bne	4b
+	mov	pc, lr
+
+8:	@ Division by a power of 2: determine what that divisor order is
+	@ then simply shift values around
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	ip, r4
+	rsb	ip, ip, #31
+
+#else
+
+	mov	yl, r4
+	cmp	r4, #(1 << 16)
+	mov	ip, #0
+	movhs	yl, yl, lsr #16
+	movhs	ip, #16
+
+	cmp	yl, #(1 << 8)
+	movhs	yl, yl, lsr #8
+	addhs	ip, ip, #8
+
+	cmp	yl, #(1 << 4)
+	movhs	yl, yl, lsr #4
+	addhs	ip, ip, #4
+
+	cmp	yl, #(1 << 2)
+	addhi	ip, ip, #3
+	addls	ip, ip, yl, lsr #1
+
+#endif
+
+	mov	yh, xh, lsr ip
+	mov	yl, xl, lsr ip
+	rsb	ip, ip, #32
+ ARM(	orr	yl, yl, xh, lsl ip	)
+ THUMB(	lsl	xh, xh, ip		)
+ THUMB(	orr	yl, yl, xh		)
+	mov	xh, xl, lsl ip
+	mov	xh, xh, lsr ip
+	mov	pc, lr
+
+	@ eq -> division by 1: obvious enough...
+9:	moveq	yl, xl
+	moveq	yh, xh
+	moveq	xh, #0
+	moveq	pc, lr
+UNWIND(.fnend)
+
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
+	@ Division by 0:
+	str	lr, [sp, #-8]!
+	bl	__div0
+
+	@ as wrong as it could be...
+	mov	yl, #0
+	mov	yh, #0
+	mov	xh, #0
+	ldr	pc, [sp], #8
+
+UNWIND(.fnend)
+ENDPROC(__do_div64)
diff --git a/xen/arch/arm/arm32/lib/findbit.S b/xen/arch/arm/arm32/lib/findbit.S
new file mode 100644
index 0000000000..2fbcc82995
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/findbit.S
@@ -0,0 +1,198 @@
+/*
+ *  linux/arch/arm/lib/findbit.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16th March 2001 - John Ripley <jripley@sonicblue.com>
+ *   Fixed so that "size" is an exclusive not an inclusive quantity.
+ *   All users of these functions expect exclusive sizes, and may
+ *   also call with zero size.
+ * Reworked by rmk.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+                .text
+
+/*
+ * Purpose  : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+		teq	r1, #0	
+		beq	3f
+		mov	r2, #0
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		eors	r3, r3, #0xff		@ invert bits
+		bne	.L_found		@ any now set - found zero bit
+		add	r2, r2, #8		@ next bit pointer
+2:		cmp	r2, r1			@ any more?
+		blo	1b
+3:		mov	r0, r1			@ no free bits
+		mov	pc, lr
+ENDPROC(_find_first_zero_bit_le)
+
+/*
+ * Purpose  : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+		teq	r1, #0
+		beq	3b
+		ands	ip, r2, #7
+		beq	1b			@ If new byte, goto old routine
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		eor	r3, r3, #0xff		@ now looking for a 1 bit
+		movs	r3, r3, lsr ip		@ shift off unused bits
+		bne	.L_found
+		orr	r2, r2, #7		@ if zero, then no bits here
+		add	r2, r2, #1		@ align bit pointer
+		b	2b			@ loop for next bit
+ENDPROC(_find_next_zero_bit_le)
+
+/*
+ * Purpose  : Find a 'one' bit
+ * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_bit_le)
+		teq	r1, #0	
+		beq	3f
+		mov	r2, #0
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3
+		bne	.L_found		@ any now set - found zero bit
+		add	r2, r2, #8		@ next bit pointer
+2:		cmp	r2, r1			@ any more?
+		blo	1b
+3:		mov	r0, r1			@ no free bits
+		mov	pc, lr
+ENDPROC(_find_first_bit_le)
+
+/*
+ * Purpose  : Find next 'one' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_bit_le)
+		teq	r1, #0
+		beq	3b
+		ands	ip, r2, #7
+		beq	1b			@ If new byte, goto old routine
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3, lsr ip		@ shift off unused bits
+		bne	.L_found
+		orr	r2, r2, #7		@ if zero, then no bits here
+		add	r2, r2, #1		@ align bit pointer
+		b	2b			@ loop for next bit
+ENDPROC(_find_next_bit_le)
+
+#ifdef __ARMEB__
+
+ENTRY(_find_first_zero_bit_be)
+		teq	r1, #0
+		beq	3f
+		mov	r2, #0
+1:		eor	r3, r2, #0x18		@ big endian byte ordering
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		eors	r3, r3, #0xff		@ invert bits
+		bne	.L_found		@ any now set - found zero bit
+		add	r2, r2, #8		@ next bit pointer
+2:		cmp	r2, r1			@ any more?
+		blo	1b
+3:		mov	r0, r1			@ no free bits
+		mov	pc, lr
+ENDPROC(_find_first_zero_bit_be)
+
+ENTRY(_find_next_zero_bit_be)
+		teq	r1, #0
+		beq	3b
+		ands	ip, r2, #7
+		beq	1b			@ If new byte, goto old routine
+		eor	r3, r2, #0x18		@ big endian byte ordering
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		eor	r3, r3, #0xff		@ now looking for a 1 bit
+		movs	r3, r3, lsr ip		@ shift off unused bits
+		bne	.L_found
+		orr	r2, r2, #7		@ if zero, then no bits here
+		add	r2, r2, #1		@ align bit pointer
+		b	2b			@ loop for next bit
+ENDPROC(_find_next_zero_bit_be)
+
+ENTRY(_find_first_bit_be)
+		teq	r1, #0
+		beq	3f
+		mov	r2, #0
+1:		eor	r3, r2, #0x18		@ big endian byte ordering
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3
+		bne	.L_found		@ any now set - found zero bit
+		add	r2, r2, #8		@ next bit pointer
+2:		cmp	r2, r1			@ any more?
+		blo	1b
+3:		mov	r0, r1			@ no free bits
+		mov	pc, lr
+ENDPROC(_find_first_bit_be)
+
+ENTRY(_find_next_bit_be)
+		teq	r1, #0
+		beq	3b
+		ands	ip, r2, #7
+		beq	1b			@ If new byte, goto old routine
+		eor	r3, r2, #0x18		@ big endian byte ordering
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
+		movs	r3, r3, lsr ip		@ shift off unused bits
+		bne	.L_found
+		orr	r2, r2, #7		@ if zero, then no bits here
+		add	r2, r2, #1		@ align bit pointer
+		b	2b			@ loop for next bit
+ENDPROC(_find_next_bit_be)
+
+#endif
+
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.L_found:
+#if __LINUX_ARM_ARCH__ >= 5
+		rsb	r0, r3, #0
+		and	r3, r3, r0
+		clz	r3, r3
+		rsb	r3, r3, #31
+		add	r0, r2, r3
+#else
+		tst	r3, #0x0f
+		addeq	r2, r2, #4
+		movne	r3, r3, lsl #4
+		tst	r3, #0x30
+		addeq	r2, r2, #2
+		movne	r3, r3, lsl #2
+		tst	r3, #0x40
+		addeq	r2, r2, #1
+		mov	r0, r2
+#endif
+		cmp	r1, r0			@ Clamp to maxbit
+		movlo	r0, r1
+		mov	pc, lr
+
diff --git a/xen/arch/arm/arm32/lib/lib1funcs.S b/xen/arch/arm/arm32/lib/lib1funcs.S
new file mode 100644
index 0000000000..95ee3121d9
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/lib1funcs.S
@@ -0,0 +1,389 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#include <xen/config.h>
+#include "assembler.h"
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+	
+#else
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4 bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+
+#else
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif
+
+	@ Perform all needed substractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subges	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/substractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
+
+	subs	r2, r1, #1
+	moveq	pc, lr
+	bcc	Ldiv0
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+
+	ARM_DIV_BODY r0, r1, r2, r3
+
+	mov	r0, r2
+	mov	pc, lr
+
+11:	moveq	r0, #1
+	movne	r0, #0
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+UNWIND(.fnstart)
+
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	Ldiv0
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	movls	pc, lr
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
+
+	cmp	r1, #0
+	eor	ip, r0, r1			@ save the sign of the result.
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+
+	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+10:	teq	ip, r0				@ same sign ?
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+11:	movlo	r0, #0
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+UNWIND(.fnstart)
+
+	cmp	r1, #0
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}	)
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_uidiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}	)
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_idiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_idivmod)
+
+ENTRY(__aeabi_uldivmod)
+UNWIND(.fnstart)
+UNWIND(.save {lr}	)
+	sub sp, sp, #8
+	stmfd   sp!, {sp, lr}
+	bl __qdivrem
+	ldr lr, [sp, #4]
+	add sp, sp, #8
+	ldmfd sp!, {r2, r3}
+	mov	pc, lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uldivmod)
+
+ENTRY(__aeabi_ldivmod)
+UNWIND(.fnstart)
+UNWIND(.save {lr}	)
+	sub sp, sp, #16
+	stmfd   sp!, {sp, lr}
+	bl __ldivmod_helper
+	ldr lr, [sp, #4]
+	add sp, sp, #16
+	ldmfd	sp!, {r2, r3}
+	mov	pc, lr
+	
+UNWIND(.fnend)
+ENDPROC(__aeabi_ldivmod)
+#endif
+
+Ldiv0:
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+	str	lr, [sp, #-8]!
+	bl	__div0
+	mov	r0, #0			@ About as wrong as it could be.
+	ldr	pc, [sp], #8
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
diff --git a/xen/arch/arm/arm32/lib/lshrdi3.S b/xen/arch/arm/arm32/lib/lshrdi3.S
new file mode 100644
index 0000000000..3e8887ef4e
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/lshrdi3.S
@@ -0,0 +1,54 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+   Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+
+#include <xen/config.h>
+#include "assembler.h"
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
+
+	subs	r3, r2, #32
+	rsb	ip, r2, #32
+	movmi	al, al, lsr r2
+	movpl	al, ah, lsr r3
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
+	mov	ah, ah, lsr r2
+	mov	pc, lr
+
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
diff --git a/xen/arch/arm/arm32/lib/memcpy.S b/xen/arch/arm/arm32/lib/memcpy.S
new file mode 100644
index 0000000000..d1ab9fbb55
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/memcpy.S
@@ -0,0 +1,63 @@
+/*
+ *  linux/arch/arm/lib/memcpy.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	MontaVista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+#include "assembler.h"
+
+#define LDR1W_SHIFT	0
+#define STR1W_SHIFT	0
+
+	.macro ldr1w ptr reg abort
+	W(ldr) \reg, [\ptr], #4
+	.endm
+
+	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+	.endm
+
+	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro ldr1b ptr reg cond=al abort
+	ldr\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro str1w ptr reg abort
+	W(str) \reg, [\ptr], #4
+	.endm
+
+	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+	.endm
+
+	.macro str1b ptr reg cond=al abort
+	str\cond\()b \reg, [\ptr], #1
+	.endm
+
+	.macro enter reg1 reg2
+	stmdb sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.macro exit reg1 reg2
+	ldmfd sp!, {r0, \reg1, \reg2}
+	.endm
+
+	.text
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ENTRY(memcpy)
+
+#include "copy_template.S"
+
+ENDPROC(memcpy)
diff --git a/xen/arch/arm/arm32/lib/memmove.S b/xen/arch/arm/arm32/lib/memmove.S
new file mode 100644
index 0000000000..4e142b8aeb
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/memmove.S
@@ -0,0 +1,200 @@
+/*
+ *  linux/arch/arm/lib/memmove.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Sep 28, 2005
+ *  Copyright:	(C) MontaVista Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+		.text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.  This
+ * is a transposition of the code from copy_template.S but with the copy
+ * occurring in the opposite direction.
+ */
+
+ENTRY(memmove)
+
+		subs	ip, r0, r1
+		cmphi	r2, ip
+		bls	memcpy
+
+		stmfd	sp!, {r0, r4, lr}
+		add	r1, r1, r2
+		add	r0, r0, r2
+		subs	r2, r2, #4
+		blt	8f
+		ands	ip, r0, #3
+	PLD(	pld	[r1, #-4]		)
+		bne	9f
+		ands	ip, r1, #3
+		bne	10f
+
+1:		subs	r2, r2, #(28)
+		stmfd	sp!, {r5 - r8}
+		blt	5f
+
+	CALGN(	ands	ip, r0, #31		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	bcs	2f			)
+	CALGN(	adr	r4, 6f			)
+	CALGN(	subs	r2, r2, ip		)  @ C is set here
+	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	add	pc, r4, ip		)
+
+	PLD(	pld	[r1, #-4]		)
+2:	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	4f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+3:	PLD(	pld	[r1, #-128]		)
+4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		subs	r2, r2, #32
+		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+		bge	3b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	4b			)
+
+5:		ands	ip, r2, #28
+		rsb	ip, ip, #32
+		addne	pc, pc, ip		@ C is always clear here
+		b	7f
+6:		W(nop)
+		W(ldr)	r3, [r1, #-4]!
+		W(ldr)	r4, [r1, #-4]!
+		W(ldr)	r5, [r1, #-4]!
+		W(ldr)	r6, [r1, #-4]!
+		W(ldr)	r7, [r1, #-4]!
+		W(ldr)	r8, [r1, #-4]!
+		W(ldr)	lr, [r1, #-4]!
+
+		add	pc, pc, ip
+		nop
+		W(nop)
+		W(str)	r3, [r0, #-4]!
+		W(str)	r4, [r0, #-4]!
+		W(str)	r5, [r0, #-4]!
+		W(str)	r6, [r0, #-4]!
+		W(str)	r7, [r0, #-4]!
+		W(str)	r8, [r0, #-4]!
+		W(str)	lr, [r0, #-4]!
+
+	CALGN(	bcs	2b			)
+
+7:		ldmfd	sp!, {r5 - r8}
+
+8:		movs	r2, r2, lsl #31
+		ldrneb	r3, [r1, #-1]!
+		ldrcsb	r4, [r1, #-1]!
+		ldrcsb	ip, [r1, #-1]
+		strneb	r3, [r0, #-1]!
+		strcsb	r4, [r0, #-1]!
+		strcsb	ip, [r0, #-1]
+		ldmfd	sp!, {r0, r4, pc}
+
+9:		cmp	ip, #2
+		ldrgtb	r3, [r1, #-1]!
+		ldrgeb	r4, [r1, #-1]!
+		ldrb	lr, [r1, #-1]!
+		strgtb	r3, [r0, #-1]!
+		strgeb	r4, [r0, #-1]!
+		subs	r2, r2, ip
+		strb	lr, [r0, #-1]!
+		blt	8b
+		ands	ip, r1, #3
+		beq	1b
+
+10:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr	r3, [r1, #0]
+		beq	17f
+		blt	18f
+
+
+		.macro	backward_copy_shift push pull
+
+		subs	r2, r2, #28
+		blt	14f
+
+	CALGN(	ands	ip, r0, #31		)
+	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	subcc	r2, r2, ip		)
+	CALGN(	bcc	15f			)
+
+11:		stmfd	sp!, {r5 - r9}
+
+	PLD(	pld	[r1, #-4]		)
+	PLD(	subs	r2, r2, #96		)
+	PLD(	pld	[r1, #-32]		)
+	PLD(	blt	13f			)
+	PLD(	pld	[r1, #-64]		)
+	PLD(	pld	[r1, #-96]		)
+
+12:	PLD(	pld	[r1, #-128]		)
+13:		ldmdb   r1!, {r7, r8, r9, ip}
+		mov     lr, r3, push #\push
+		subs    r2, r2, #32
+		ldmdb   r1!, {r3, r4, r5, r6}
+		orr     lr, lr, ip, pull #\pull
+		mov     ip, ip, push #\push
+		orr     ip, ip, r9, pull #\pull
+		mov     r9, r9, push #\push
+		orr     r9, r9, r8, pull #\pull
+		mov     r8, r8, push #\push
+		orr     r8, r8, r7, pull #\pull
+		mov     r7, r7, push #\push
+		orr     r7, r7, r6, pull #\pull
+		mov     r6, r6, push #\push
+		orr     r6, r6, r5, pull #\pull
+		mov     r5, r5, push #\push
+		orr     r5, r5, r4, pull #\pull
+		mov     r4, r4, push #\push
+		orr     r4, r4, r3, pull #\pull
+		stmdb   r0!, {r4 - r9, ip, lr}
+		bge	12b
+	PLD(	cmn	r2, #96			)
+	PLD(	bge	13b			)
+
+		ldmfd	sp!, {r5 - r9}
+
+14:		ands	ip, r2, #28
+		beq	16f
+
+15:		mov     lr, r3, push #\push
+		ldr	r3, [r1, #-4]!
+		subs	ip, ip, #4
+		orr	lr, lr, r3, pull #\pull
+		str	lr, [r0, #-4]!
+		bgt	15b
+	CALGN(	cmp	r2, #0			)
+	CALGN(	bge	11b			)
+
+16:		add	r1, r1, #(\pull / 8)
+		b	8b
+
+		.endm
+
+
+		backward_copy_shift	push=8	pull=24
+
+17:		backward_copy_shift	push=16	pull=16
+
+18:		backward_copy_shift	push=24	pull=8
+
+ENDPROC(memmove)
diff --git a/xen/arch/arm/arm32/lib/memset.S b/xen/arch/arm/arm32/lib/memset.S
new file mode 100644
index 0000000000..d2937a3e0e
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/memset.S
@@ -0,0 +1,129 @@
+/*
+ *  linux/arch/arm/lib/memset.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+	.text
+	.align	5
+	.word	0
+
+1:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5f			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r1, [r0], #1		@ 1
+	strleb	r1, [r0], #1		@ 1
+	strb	r1, [r0], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memset again.
+ */
+
+ENTRY(memset)
+	ands	r3, r0, #3		@ 1 unaligned?
+	bne	1b			@ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+	orr	r1, r1, r1, lsl #8
+	orr	r1, r1, r1, lsl #16
+	mov	r3, r1
+	cmp	r2, #16
+	blt	4f
+
+#if ! CALGN(1)+0
+
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+	str	lr, [sp, #-4]!
+	mov	ip, r1
+	mov	lr, r1
+
+2:	subs	r2, r2, #64
+	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	bgt	2b
+	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+	tst	r2, #32
+	stmneia	r0!, {r1, r3, ip, lr}
+	stmneia	r0!, {r1, r3, ip, lr}
+	tst	r2, #16
+	stmneia	r0!, {r1, r3, ip, lr}
+	ldr	lr, [sp], #4
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+	stmfd	sp!, {r4-r7, lr}
+	mov	r4, r1
+	mov	r5, r1
+	mov	r6, r1
+	mov	r7, r1
+	mov	ip, r1
+	mov	lr, r1
+
+	cmp	r2, #96
+	tstgt	r0, #31
+	ble	3f
+
+	and	ip, r0, #31
+	rsb	ip, ip, #32
+	sub	r2, r2, ip
+	movs	ip, ip, lsl #(32 - 4)
+	stmcsia	r0!, {r4, r5, r6, r7}
+	stmmiia	r0!, {r4, r5}
+	tst	ip, #(1 << 30)
+	mov	ip, r1
+	strne	r1, [r0], #4
+
+3:	subs	r2, r2, #64
+	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	bgt	3b
+	ldmeqfd	sp!, {r4-r7, pc}
+
+	tst	r2, #32
+	stmneia	r0!, {r1, r3-r7, ip, lr}
+	tst	r2, #16
+	stmneia	r0!, {r4-r7}
+	ldmfd	sp!, {r4-r7, lr}
+
+#endif
+
+4:	tst	r2, #8
+	stmneia	r0!, {r1, r3}
+	tst	r2, #4
+	strne	r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:	tst	r2, #2
+	strneb	r1, [r0], #1
+	strneb	r1, [r0], #1
+	tst	r2, #1
+	strneb	r1, [r0], #1
+	mov	pc, lr
+ENDPROC(memset)
diff --git a/xen/arch/arm/arm32/lib/memzero.S b/xen/arch/arm/arm32/lib/memzero.S
new file mode 100644
index 0000000000..ce25acaad1
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/memzero.S
@@ -0,0 +1,127 @@
+/*
+ *  linux/arch/arm/lib/memzero.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/config.h>
+
+#include "assembler.h"
+
+	.text
+	.align	5
+	.word	0
+/*
+ * Align the pointer in r0.  r3 contains the number of bytes that we are
+ * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
+ * don't bother; we use byte stores instead.
+ */
+1:	subs	r1, r1, #4		@ 1 do we have enough
+	blt	5f			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r2, [r0], #1		@ 1
+	strleb	r2, [r0], #1		@ 1
+	strb	r2, [r0], #1		@ 1
+	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+
+ENTRY(__memzero)
+	mov	r2, #0			@ 1
+	ands	r3, r0, #3		@ 1 unaligned?
+	bne	1b			@ 1
+/*
+ * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
+ */
+	cmp	r1, #16			@ 1 we can skip this chunk if we
+	blt	4f			@ 1 have < 16 bytes
+
+#if ! CALGN(1)+0
+
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+	str	lr, [sp, #-4]!		@ 1
+	mov	ip, r2			@ 1
+	mov	lr, r2			@ 1
+
+3:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	bgt	3b			@ 1
+	ldmeqfd	sp!, {pc}		@ 1/2 quick exit
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+	tst	r1, #32			@ 1
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	tst	r1, #16			@ 1 16 bytes or more?
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	ldr	lr, [sp], #4		@ 1
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+	stmfd	sp!, {r4-r7, lr}
+	mov	r4, r2
+	mov	r5, r2
+	mov	r6, r2
+	mov	r7, r2
+	mov	ip, r2
+	mov	lr, r2
+
+	cmp	r1, #96
+	andgts	ip, r0, #31
+	ble	3f
+
+	rsb	ip, ip, #32
+	sub	r1, r1, ip
+	movs	ip, ip, lsl #(32 - 4)
+	stmcsia	r0!, {r4, r5, r6, r7}
+	stmmiia	r0!, {r4, r5}
+	movs	ip, ip, lsl #2
+	strcs	r2, [r0], #4
+
+3:	subs	r1, r1, #64
+	stmgeia	r0!, {r2-r7, ip, lr}
+	stmgeia	r0!, {r2-r7, ip, lr}
+	bgt	3b
+	ldmeqfd	sp!, {r4-r7, pc}
+
+	tst	r1, #32
+	stmneia	r0!, {r2-r7, ip, lr}
+	tst	r1, #16
+	stmneia	r0!, {r4-r7}
+	ldmfd	sp!, {r4-r7, lr}
+
+#endif
+
+4:	tst	r1, #8			@ 1 8 bytes or more?
+	stmneia	r0!, {r2, r3}		@ 2
+	tst	r1, #4			@ 1 4 bytes or more?
+	strne	r2, [r0], #4		@ 1
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:	tst	r1, #2			@ 1 2 bytes or more?
+	strneb	r2, [r0], #1		@ 1
+	strneb	r2, [r0], #1		@ 1
+	tst	r1, #1			@ 1 a byte left over
+	strneb	r2, [r0], #1		@ 1
+	mov	pc, lr			@ 1
+ENDPROC(__memzero)
diff --git a/xen/arch/arm/arm32/lib/setbit.S b/xen/arch/arm/arm32/lib/setbit.S
new file mode 100644
index 0000000000..c828851553
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/setbit.S
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/setbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+	.text
+
+ENTRY(_set_bit)
+	bitop	orr
+ENDPROC(_set_bit)
diff --git a/xen/arch/arm/arm32/lib/testchangebit.S b/xen/arch/arm/arm32/lib/testchangebit.S
new file mode 100644
index 0000000000..a7f527cd98
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/testchangebit.S
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testchangebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_change_bit)
+	testop	eor, str
+ENDPROC(_test_and_change_bit)
diff --git a/xen/arch/arm/arm32/lib/testclearbit.S b/xen/arch/arm/arm32/lib/testclearbit.S
new file mode 100644
index 0000000000..8f39c72936
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/testclearbit.S
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testclearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_clear_bit)
+	testop	bicne, strne
+ENDPROC(_test_and_clear_bit)
diff --git a/xen/arch/arm/arm32/lib/testsetbit.S b/xen/arch/arm/arm32/lib/testsetbit.S
new file mode 100644
index 0000000000..1b8d273100
--- /dev/null
+++ b/xen/arch/arm/arm32/lib/testsetbit.S
@@ -0,0 +1,18 @@
+/*
+ *  linux/arch/arm/lib/testsetbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <xen/config.h>
+
+#include "assembler.h"
+#include "bitops.h"
+                .text
+
+ENTRY(_test_and_set_bit)
+	testop	orreq, streq
+ENDPROC(_test_and_set_bit)
diff --git a/xen/arch/arm/arm32/mode_switch.S b/xen/arch/arm/arm32/mode_switch.S
new file mode 100644
index 0000000000..411eb924cb
--- /dev/null
+++ b/xen/arch/arm/arm32/mode_switch.S
@@ -0,0 +1,121 @@
+/*
+ * xen/arch/arm/mode_switch.S
+ *
+ * Start-of day code to take a CPU from Secure mode to Hyp mode.
+ *
+ * Tim Deegan <tim@xen.org>
+ * Copyright (c) 2011-2012 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/config.h>
+#include <asm/page.h>
+#include <asm/platform_vexpress.h>
+#include <asm/asm_defns.h>
+#include <asm/gic.h>
+
+
+/* XXX: Versatile Express specific code */
+/* wake up secondary cpus */
+.globl kick_cpus
+kick_cpus:
+        /* write start paddr to v2m sysreg FLAGSSET register */
+        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO address */
+        dsb
+        mov   r2, #0xffffffff
+        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
+        dsb
+        ldr   r2, =start
+        add   r2, r2, r10
+        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
+        dsb
+        /* send an interrupt */
+        ldr   r0, =(GIC_BASE_ADDRESS + GIC_DR_OFFSET) /* base GICD MMIO address */
+        mov   r2, #0x1
+        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
+        mov   r2, #0xfe0000
+        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody */
+        dsb
+        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
+        mov   pc, lr
+
+
+/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
+ *
+ * Expects r12 == CPU number
+ *
+ * This code is specific to the VE model, and not intended to be used
+ * on production systems.  As such it's a bit hackier than the main
+ * boot code in head.S.  In future it will be replaced by better
+ * integration with the bootloader/firmware so that Xen always starts
+ * in Hyp mode. */
+
+.globl enter_hyp_mode
+enter_hyp_mode:
+        mov   r3, lr                 /* Put return address in non-banked reg */
+        cpsid aif, #0x16             /* Enter Monitor mode */
+        mrc   CP32(r0, SCR)
+        orr   r0, r0, #0x100         /* Set HCE */
+        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
+        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
+        mcr   CP32(r0, SCR)
+        /* Ugly: the system timer's frequency register is only
+         * programmable in Secure state.  Since we don't know where its
+         * memory-mapped control registers live, we can't find out the
+         * right frequency.  Use the VE model's default frequency here. */
+        ldr   r0, =0x5f5e100         /* 100 MHz */
+        mcr   CP32(r0, CNTFRQ)
+        ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
+        mcr   CP32(r0, NSACR)
+        mov   r0, #GIC_BASE_ADDRESS
+        add   r0, r0, #GIC_DR_OFFSET
+        /* Disable the GIC distributor, on the boot CPU only */
+        mov   r1, #0
+        teq   r12, #0                /* Is this the boot CPU? */
+        streq r1, [r0]
+        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
+         * The first 32 interrupts (SGIs & PPIs) must be configured on all
+         * CPUs while the remainder are SPIs and only need to be done one, on
+         * the boot CPU. */
+        add   r0, r0, #0x80          /* GICD_IGROUP0 */
+        mov   r2, #0xffffffff        /* All interrupts to group 1 */
+        teq   r12, #0                /* Boot CPU? */
+        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
+        streq r2, [r0, #4]           /* Interrupts 32-63 (SPI) */
+        streq r2, [r0, #8]           /* Interrupts 64-95 (SPI) */
+        /* Disable the GIC CPU interface on all processors */
+        mov   r0, #GIC_BASE_ADDRESS
+        add   r0, r0, #GIC_CR_OFFSET
+        mov   r1, #0
+        str   r1, [r0]
+        /* Must drop priority mask below 0x80 before entering NS state */
+        ldr   r1, =0xff
+        str   r1, [r0, #0x4]         /* -> GICC_PMR */
+        /* Reset a few config registers */
+        mov   r0, #0
+        mcr   CP32(r0, FCSEIDR)
+        mcr   CP32(r0, CONTEXTIDR)
+        /* Allow non-secure access to coprocessors, FIQs, VFP and NEON */
+        ldr   r1, =0x3fff            /* 14 CP bits set, all others clear */
+        mcr   CP32(r1, NSACR)
+
+        mrs   r0, cpsr               /* Copy the CPSR */
+        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
+        msr   spsr_cxsf, r0          /* into the SPSR */
+        movs  pc, r3                 /* Exception-return into Hyp mode */
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm32/proc-ca15.S b/xen/arch/arm/arm32/proc-ca15.S
new file mode 100644
index 0000000000..dcdd42e09e
--- /dev/null
+++ b/xen/arch/arm/arm32/proc-ca15.S
@@ -0,0 +1,35 @@
+/*
+ * xen/arch/arm/proc-ca15.S
+ *
+ * Cortex A15 specific initializations
+ *
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/asm_defns.h>
+#include <asm/processor-ca15.h>
+
+.globl cortex_a15_init
+cortex_a15_init:
+        /* Set up the SMP bit in ACTLR */
+        mrc   CP32(r0, ACTLR)
+        orr   r0, r0, #(ACTLR_CA15_SMP) /* enable SMP bit */
+        mcr   CP32(r0, ACTLR)
+        mov   pc, lr
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/asm-offsets.c b/xen/arch/arm/asm-offsets.c
deleted file mode 100644
index cc1a72a659..0000000000
--- a/xen/arch/arm/asm-offsets.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <public/xen.h>
-#include <asm/current.h>
-
-#define DEFINE(_sym, _val) \
-    __asm__ __volatile__ ( "\n->" #_sym " %0 " #_val : : "i" (_val) )
-#define BLANK() \
-    __asm__ __volatile__ ( "\n->" : : )
-#define OFFSET(_sym, _str, _mem) \
-    DEFINE(_sym, offsetof(_str, _mem));
-
-/* base-2 logarithm */
-#define __L2(_x)  (((_x) & 0x00000002) ?   1 : 0)
-#define __L4(_x)  (((_x) & 0x0000000c) ? ( 2 + __L2( (_x)>> 2)) : __L2( _x))
-#define __L8(_x)  (((_x) & 0x000000f0) ? ( 4 + __L4( (_x)>> 4)) : __L4( _x))
-#define __L16(_x) (((_x) & 0x0000ff00) ? ( 8 + __L8( (_x)>> 8)) : __L8( _x))
-#define LOG_2(_x) (((_x) & 0xffff0000) ? (16 + __L16((_x)>>16)) : __L16(_x))
-
-void __dummy__(void)
-{
-   OFFSET(UREGS_sp, struct cpu_user_regs, sp);
-   OFFSET(UREGS_lr, struct cpu_user_regs, lr);
-   OFFSET(UREGS_pc, struct cpu_user_regs, pc);
-   OFFSET(UREGS_cpsr, struct cpu_user_regs, cpsr);
-
-   OFFSET(UREGS_LR_usr, struct cpu_user_regs, lr_usr);
-   OFFSET(UREGS_SP_usr, struct cpu_user_regs, sp_usr);
-
-   OFFSET(UREGS_SP_svc, struct cpu_user_regs, sp_svc);
-   OFFSET(UREGS_LR_svc, struct cpu_user_regs, lr_svc);
-   OFFSET(UREGS_SPSR_svc, struct cpu_user_regs, spsr_svc);
-
-   OFFSET(UREGS_SP_abt, struct cpu_user_regs, sp_abt);
-   OFFSET(UREGS_LR_abt, struct cpu_user_regs, lr_abt);
-   OFFSET(UREGS_SPSR_abt, struct cpu_user_regs, spsr_abt);
-
-   OFFSET(UREGS_SP_und, struct cpu_user_regs, sp_und);
-   OFFSET(UREGS_LR_und, struct cpu_user_regs, lr_und);
-   OFFSET(UREGS_SPSR_und, struct cpu_user_regs, spsr_und);
-
-   OFFSET(UREGS_SP_irq, struct cpu_user_regs, sp_irq);
-   OFFSET(UREGS_LR_irq, struct cpu_user_regs, lr_irq);
-   OFFSET(UREGS_SPSR_irq, struct cpu_user_regs, spsr_irq);
-
-   OFFSET(UREGS_SP_fiq, struct cpu_user_regs, sp_fiq);
-   OFFSET(UREGS_LR_fiq, struct cpu_user_regs, lr_fiq);
-   OFFSET(UREGS_SPSR_fiq, struct cpu_user_regs, spsr_fiq);
-
-   OFFSET(UREGS_R8_fiq, struct cpu_user_regs, r8_fiq);
-   OFFSET(UREGS_R9_fiq, struct cpu_user_regs, r9_fiq);
-   OFFSET(UREGS_R10_fiq, struct cpu_user_regs, r10_fiq);
-   OFFSET(UREGS_R11_fiq, struct cpu_user_regs, r11_fiq);
-   OFFSET(UREGS_R12_fiq, struct cpu_user_regs, r12_fiq);
-
-   OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, cpsr);
-   DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
-   BLANK();
-
-   DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
-
-   OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 7bbad45553..2ad5c9063e 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -12,7 +12,7 @@
 #include <asm/p2m.h>
 #include <asm/irq.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 #include "vtimer.h"
 #include "vpl011.h"
 
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 432fdc01c3..1e9776d597 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -11,7 +11,7 @@
 #include <xen/libfdt/libfdt.h>
 #include <xen/guest_access.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 #include "kernel.h"
 
 static unsigned int __initdata opt_dom0_max_vcpus;
diff --git a/xen/arch/arm/entry.S b/xen/arch/arm/entry.S
deleted file mode 100644
index 36114273d7..0000000000
--- a/xen/arch/arm/entry.S
+++ /dev/null
@@ -1,141 +0,0 @@
-#include <xen/config.h>
-#include <asm/asm_defns.h>
-#include <public/xen.h>
-
-#define SAVE_ONE_BANKED(reg)    mrs r11, reg; str r11, [sp, #UREGS_##reg]
-#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
-
-#define SAVE_BANKED(mode) \
-        SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ; SAVE_ONE_BANKED(SPSR_##mode)
-
-#define RESTORE_BANKED(mode) \
-        RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode)
-
-#define SAVE_ALL                                                        \
-        sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */      \
-        push {r0-r12}; /* Save R0-R12 */                                \
-                                                                        \
-        mrs r11, ELR_hyp;               /* ELR_hyp is return address. */\
-        str r11, [sp, #UREGS_pc];                                       \
-                                                                        \
-        str lr, [sp, #UREGS_lr];                                        \
-                                                                        \
-        add r11, sp, #UREGS_kernel_sizeof+4;                            \
-        str r11, [sp, #UREGS_sp];                                       \
-                                                                        \
-        mrs r11, SPSR_hyp;                                              \
-        str r11, [sp, #UREGS_cpsr];                                     \
-        and r11, #PSR_MODE_MASK;                                        \
-        cmp r11, #PSR_MODE_HYP;                                         \
-        blne save_guest_regs
-
-save_guest_regs:
-        ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
-        str r11, [sp, #UREGS_sp]
-        SAVE_ONE_BANKED(SP_usr)
-        /* LR_usr is the same physical register as lr and is saved in SAVE_ALL */
-        SAVE_BANKED(svc)
-        SAVE_BANKED(abt)
-        SAVE_BANKED(und)
-        SAVE_BANKED(irq)
-        SAVE_BANKED(fiq)
-        SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq)
-        SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
-        mov pc, lr
-
-#define DEFINE_TRAP_ENTRY(trap)                                         \
-        ALIGN;                                                          \
-trap_##trap:                                                            \
-        SAVE_ALL;                                                       \
-        cpsie i;        /* local_irq_enable */                          \
-        adr lr, return_from_trap;                                       \
-        mov r0, sp;                                                     \
-        mov r11, sp;                                                    \
-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
-        b do_trap_##trap
-
-#define DEFINE_TRAP_ENTRY_NOIRQ(trap)                                   \
-        ALIGN;                                                          \
-trap_##trap:                                                            \
-        SAVE_ALL;                                                       \
-        adr lr, return_from_trap;                                       \
-        mov r0, sp;                                                     \
-        mov r11, sp;                                                    \
-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
-        b do_trap_##trap
-
-.globl hyp_traps_vector
-        .align 5
-hyp_traps_vector:
-        .word 0                         /* 0x00 - Reset */
-        b trap_undefined_instruction    /* 0x04 - Undefined Instruction */
-        b trap_supervisor_call          /* 0x08 - Supervisor Call */
-        b trap_prefetch_abort           /* 0x0c - Prefetch Abort */
-        b trap_data_abort               /* 0x10 - Data Abort */
-        b trap_hypervisor               /* 0x14 - Hypervisor */
-        b trap_irq                      /* 0x18 - IRQ */
-        b trap_fiq                      /* 0x1c - FIQ */
-
-DEFINE_TRAP_ENTRY(undefined_instruction)
-DEFINE_TRAP_ENTRY(supervisor_call)
-DEFINE_TRAP_ENTRY(prefetch_abort)
-DEFINE_TRAP_ENTRY(data_abort)
-DEFINE_TRAP_ENTRY(hypervisor)
-DEFINE_TRAP_ENTRY_NOIRQ(irq)
-DEFINE_TRAP_ENTRY_NOIRQ(fiq)
-
-return_from_trap:
-        mov sp, r11
-ENTRY(return_to_new_vcpu)
-        ldr r11, [sp, #UREGS_cpsr]
-        and r11, #PSR_MODE_MASK
-        cmp r11, #PSR_MODE_HYP
-        beq return_to_hypervisor
-        /* Fall thru */
-ENTRY(return_to_guest)
-        mov r11, sp
-        bic sp, #7 /* Align the stack pointer */
-        bl leave_hypervisor_tail /* Disables interrupts on return */
-        mov sp, r11
-        RESTORE_ONE_BANKED(SP_usr)
-        /* LR_usr is the same physical register as lr and is restored below */
-        RESTORE_BANKED(svc)
-        RESTORE_BANKED(abt)
-        RESTORE_BANKED(und)
-        RESTORE_BANKED(irq)
-        RESTORE_BANKED(fiq)
-        RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq); RESTORE_ONE_BANKED(R10_fiq)
-        RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
-        /* Fall thru */
-ENTRY(return_to_hypervisor)
-        cpsid i
-        ldr lr, [sp, #UREGS_lr]
-        ldr r11, [sp, #UREGS_pc]
-        msr ELR_hyp, r11
-        ldr r11, [sp, #UREGS_cpsr]
-        msr SPSR_hyp, r11
-        pop {r0-r12}
-        add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
-        eret
-
-/*
- * struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next)
- *
- * r0 - prev
- * r1 - next
- *
- * Returns prev in r0
- */
-ENTRY(__context_switch)
-        add     ip, r0, #VCPU_arch_saved_context
-        stmia   ip!, {r4 - sl, fp, sp, lr}      /* Save register state */
-
-        add     r4, r1, #VCPU_arch_saved_context
-        ldmia   r4, {r4 - sl, fp, sp, pc}       /* Load registers and return */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 7686bde7cf..b77761e245 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -30,7 +30,7 @@
 #include <asm/p2m.h>
 #include <asm/domain.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 
 /* Access to the GIC Distributor registers through the fixmap */
 #define GICD ((volatile uint32_t *) FIXMAP_ADDR(FIXMAP_GICD))
diff --git a/xen/arch/arm/gic.h b/xen/arch/arm/gic.h
deleted file mode 100644
index 1bf1b02913..0000000000
--- a/xen/arch/arm/gic.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * xen/arch/arm/gic.h
- *
- * ARM Generic Interrupt Controller support
- *
- * Tim Deegan <tim@xen.org>
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __ARCH_ARM_GIC_H__
-#define __ARCH_ARM_GIC_H__
-
-#define GICD_CTLR       (0x000/4)
-#define GICD_TYPER      (0x004/4)
-#define GICD_IIDR       (0x008/4)
-#define GICD_IGROUPR    (0x080/4)
-#define GICD_IGROUPRN   (0x0FC/4)
-#define GICD_ISENABLER  (0x100/4)
-#define GICD_ISENABLERN (0x17C/4)
-#define GICD_ICENABLER  (0x180/4)
-#define GICD_ICENABLERN (0x1fC/4)
-#define GICD_ISPENDR    (0x200/4)
-#define GICD_ISPENDRN   (0x27C/4)
-#define GICD_ICPENDR    (0x280/4)
-#define GICD_ICPENDRN   (0x2FC/4)
-#define GICD_ISACTIVER  (0x300/4)
-#define GICD_ISACTIVERN (0x37C/4)
-#define GICD_ICACTIVER  (0x380/4)
-#define GICD_ICACTIVERN (0x3FC/4)
-#define GICD_IPRIORITYR (0x400/4)
-#define GICD_IPRIORITYRN (0x7F8/4)
-#define GICD_ITARGETSR  (0x800/4)
-#define GICD_ITARGETSRN (0xBF8/4)
-#define GICD_ICFGR      (0xC00/4)
-#define GICD_ICFGRN     (0xCFC/4)
-#define GICD_NSACR      (0xE00/4)
-#define GICD_NSACRN     (0xEFC/4)
-#define GICD_ICPIDR2    (0xFE8/4)
-#define GICD_SGIR       (0xF00/4)
-#define GICD_CPENDSGIR  (0xF10/4)
-#define GICD_CPENDSGIRN (0xF1C/4)
-#define GICD_SPENDSGIR  (0xF20/4)
-#define GICD_SPENDSGIRN (0xF2C/4)
-#define GICD_ICPIDR2    (0xFE8/4)
-
-#define GICC_CTLR       (0x0000/4)
-#define GICC_PMR        (0x0004/4)
-#define GICC_BPR        (0x0008/4)
-#define GICC_IAR        (0x000C/4)
-#define GICC_EOIR       (0x0010/4)
-#define GICC_RPR        (0x0014/4)
-#define GICC_HPPIR      (0x0018/4)
-#define GICC_APR        (0x00D0/4)
-#define GICC_NSAPR      (0x00E0/4)
-#define GICC_DIR        (0x1000/4)
-
-#define GICH_HCR        (0x00/4)
-#define GICH_VTR        (0x04/4)
-#define GICH_VMCR       (0x08/4)
-#define GICH_MISR       (0x10/4)
-#define GICH_EISR0      (0x20/4)
-#define GICH_EISR1      (0x24/4)
-#define GICH_ELSR0      (0x30/4)
-#define GICH_ELSR1      (0x34/4)
-#define GICH_APR        (0xF0/4)
-#define GICH_LR         (0x100/4)
-
-/* Register bits */
-#define GICD_CTL_ENABLE 0x1
-
-#define GICD_TYPE_LINES 0x01f
-#define GICD_TYPE_CPUS  0x0e0
-#define GICD_TYPE_SEC   0x400
-
-#define GICC_CTL_ENABLE 0x1
-#define GICC_CTL_EOI    (0x1 << 9)
-
-#define GICC_IA_IRQ     0x03ff
-#define GICC_IA_CPU     0x1c00
-
-#define GICH_HCR_EN       (1 << 0)
-#define GICH_HCR_UIE      (1 << 1)
-#define GICH_HCR_LRENPIE  (1 << 2)
-#define GICH_HCR_NPIE     (1 << 3)
-#define GICH_HCR_VGRP0EIE (1 << 4)
-#define GICH_HCR_VGRP0DIE (1 << 5)
-#define GICH_HCR_VGRP1EIE (1 << 6)
-#define GICH_HCR_VGRP1DIE (1 << 7)
-
-#define GICH_MISR_EOI     (1 << 0)
-#define GICH_MISR_U       (1 << 1)
-#define GICH_MISR_LRENP   (1 << 2)
-#define GICH_MISR_NP      (1 << 3)
-#define GICH_MISR_VGRP0E  (1 << 4)
-#define GICH_MISR_VGRP0D  (1 << 5)
-#define GICH_MISR_VGRP1E  (1 << 6)
-#define GICH_MISR_VGRP1D  (1 << 7)
-
-#define GICH_LR_VIRTUAL_MASK    0x3ff
-#define GICH_LR_VIRTUAL_SHIFT   0
-#define GICH_LR_PHYSICAL_MASK   0x3ff
-#define GICH_LR_PHYSICAL_SHIFT  10
-#define GICH_LR_STATE_MASK      0x3
-#define GICH_LR_STATE_SHIFT     28
-#define GICH_LR_PRIORITY_SHIFT  23
-#define GICH_LR_MAINTENANCE_IRQ (1<<19)
-#define GICH_LR_PENDING         (1<<28)
-#define GICH_LR_ACTIVE          (1<<29)
-#define GICH_LR_GRP1            (1<<30)
-#define GICH_LR_HW              (1<<31)
-#define GICH_LR_CPUID_SHIFT     9
-#define GICH_VTR_NRLRGS         0x3f
-
-/* XXX: write this into the DT */
-#define VGIC_IRQ_EVTCHN_CALLBACK 31
-
-#ifndef __ASSEMBLY__
-extern int domain_vgic_init(struct domain *d);
-extern void domain_vgic_free(struct domain *d);
-
-extern int vcpu_vgic_init(struct vcpu *v);
-
-extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq,int virtual);
-extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
-
-extern void gic_route_ppis(void);
-extern void gic_route_spis(void);
-
-extern void gic_inject(void);
-
-extern void __cpuinit init_maintenance_interrupt(void);
-extern void gic_set_guest_irq(struct vcpu *v, unsigned int irq,
-        unsigned int state, unsigned int priority);
-extern int gic_route_irq_to_guest(struct domain *d, unsigned int irq,
-                                  const char * devname);
-
-/* Accept an interrupt from the GIC and dispatch its handler */
-extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
-/* Bring up the interrupt controller, and report # cpus attached */
-extern void gic_init(void);
-/* Bring up a secondary CPU's per-CPU GIC interface */
-extern void gic_init_secondary_cpu(void);
-/* Take down a CPU's per-CPU GIC interface */
-extern void gic_disable_cpu(void);
-/* setup the gic virtual interface for a guest */
-extern int gicv_setup(struct domain *d);
-
-/* Context switch */
-extern void gic_save_state(struct vcpu *v);
-extern void gic_restore_state(struct vcpu *v);
-
-#endif /* __ASSEMBLY__ */
-#endif
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
deleted file mode 100644
index 93f4edba53..0000000000
--- a/xen/arch/arm/head.S
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * xen/arch/arm/head.S
- *
- * Start-of-day code for an ARMv7-A with virt extensions.
- *
- * Tim Deegan <tim@xen.org>
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/config.h>
-#include <asm/page.h>
-#include <asm/processor-ca15.h>
-#include <asm/asm_defns.h>
-
-#define ZIMAGE_MAGIC_NUMBER 0x016f2818
-
-#define PT_PT  0xe7f /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=1, P=1 */
-#define PT_MEM 0xe7d /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=0, P=1 */
-#define PT_DEV 0xe71 /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=0, P=1 */
-#define PT_DEV_L3 0xe73 /* lev3: nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=1, P=1 */
-
-#define PT_UPPER(x) (PT_##x & 0xf00)
-#define PT_LOWER(x) (PT_##x & 0x0ff)
-
-/* Macro to print a string to the UART, if there is one.
- * Clobbers r0-r3. */
-#ifdef EARLY_UART_ADDRESS
-#define PRINT(_s)       \
-        adr   r0, 98f ; \
-        bl    puts    ; \
-        b     99f     ; \
-98:     .asciz _s     ; \
-        .align 2      ; \
-99:
-#else
-#define PRINT(s)
-#endif
-
-        .arm
-
-        /* This must be the very first address in the loaded image.
-         * It should be linked at XEN_VIRT_START, and loaded at any
-         * 2MB-aligned address.  All of text+data+bss must fit in 2MB,
-         * or the initial pagetable code below will need adjustment. */
-        .global start
-start:
-
-        /* zImage magic header, see:
-         * http://www.simtec.co.uk/products/SWLINUX/files/booting_article.html#d0e309
-         */
-        .rept 8
-        mov   r0, r0
-        .endr
-        b     past_zImage
-
-        .word ZIMAGE_MAGIC_NUMBER    /* Magic numbers to help the loader */
-        .word 0x00000000             /* absolute load/run zImage address or
-                                      * 0 for PiC */
-        .word (_end - start)         /* zImage end address */
-
-past_zImage:
-        cpsid aif                    /* Disable all interrupts */
-
-        /* Save the bootloader arguments in less-clobberable registers */
-        mov   r7, r1                 /* r7 := ARM-linux machine type */
-        mov   r8, r2                 /* r8 := ATAG base address */
-
-        /* Find out where we are */
-        ldr   r0, =start
-        adr   r9, start              /* r9  := paddr (start) */
-        sub   r10, r9, r0            /* r10 := phys-offset */
-
-        /* Using the DTB in the .dtb section? */
-#ifdef CONFIG_DTB_FILE
-        ldr   r8, =_sdtb
-        add   r8, r10                /* r8 := paddr(DTB) */
-#endif
-
-        /* Are we the boot CPU? */
-        mov   r12, #0                /* r12 := CPU ID */
-        mrc   CP32(r0, MPIDR)
-        tst   r0, #(1<<31)           /* Multiprocessor extension supported? */
-        beq   boot_cpu
-        tst   r0, #(1<<30)           /* Uniprocessor system? */
-        bne   boot_cpu
-        bics  r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
-        beq   boot_cpu               /* If we're CPU 0, boot now */
-
-        /* Non-boot CPUs wait here to be woken up one at a time. */
-1:      dsb
-        ldr   r0, =smp_up_cpu        /* VA of gate */
-        add   r0, r0, r10            /* PA of gate */
-        ldr   r1, [r0]               /* Which CPU is being booted? */
-        teq   r1, r12                /* Is it us? */
-        wfene
-        bne   1b
-
-boot_cpu:
-#ifdef EARLY_UART_ADDRESS
-        ldr   r11, =EARLY_UART_ADDRESS  /* r11 := UART base address */
-        teq   r12, #0                   /* CPU 0 sets up the UART too */
-        bleq  init_uart
-        PRINT("- CPU ")
-        mov   r0, r12
-        bl    putn
-        PRINT(" booting -\r\n")
-#endif
-
-        /* Wake up secondary cpus */
-        teq   r12, #0
-        bleq  kick_cpus
-
-        /* Check that this CPU has Hyp mode */
-        mrc   CP32(r0, ID_PFR1)
-        and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
-        teq   r0, #0x1000            /* Must == 0x1 or may be incompatible */
-        beq   1f
-        PRINT("- CPU doesn't support the virtualization extensions -\r\n")
-        b     fail
-1:
-        /* Check if we're already in it */
-        mrs   r0, cpsr
-        and   r0, r0, #0x1f          /* Mode is in the low 5 bits of CPSR */
-        teq   r0, #0x1a              /* Hyp Mode? */
-        bne   1f
-        PRINT("- Started in Hyp mode -\r\n")
-        b     hyp
-1:
-        /* Otherwise, it must have been Secure Supervisor mode */
-        mrc   CP32(r0, SCR)
-        tst   r0, #0x1               /* Not-Secure bit set? */
-        beq   1f
-        PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
-        b     fail
-1:
-        /* OK, we're in Secure state. */
-        PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
-        ldr   r0, =enter_hyp_mode    /* VA of function */
-        adr   lr, hyp                /* Set return address for call */
-        add   pc, r0, r10            /* Call PA of function */
-
-hyp:
-
-        /* Zero BSS On the boot CPU to avoid nasty surprises */
-        teq   r12, #0
-        bne   skip_bss
-
-        PRINT("- Zero BSS -\r\n")
-        ldr   r0, =__bss_start       /* Load start & end of bss */
-        ldr   r1, =__bss_end
-        add   r0, r0, r10            /* Apply physical offset */
-        add   r1, r1, r10
-
-        mov   r2, #0
-1:      str   r2, [r0], #4
-        cmp   r0, r1
-        blo   1b
-
-skip_bss:
-
-        PRINT("- Setting up control registers -\r\n")
-
-        /* Read CPU ID */
-        mrc   CP32(r0, MIDR)
-        ldr   r1, =(MIDR_MASK)
-        and   r0, r0, r1
-        /* Is this a Cortex A15? */
-        ldr   r1, =(CORTEX_A15_ID)
-        teq   r0, r1
-        bleq  cortex_a15_init
-
-        /* Set up memory attribute type tables */
-        ldr   r0, =MAIR0VAL
-        ldr   r1, =MAIR1VAL
-        mcr   CP32(r0, MAIR0)
-        mcr   CP32(r1, MAIR1)
-        mcr   CP32(r0, HMAIR0)
-        mcr   CP32(r1, HMAIR1)
-
-        /* Set up the HTCR:
-         * PT walks use Outer-Shareable accesses,
-         * PT walks are write-back, no-write-allocate in both cache levels,
-         * Full 32-bit address space goes through this table. */
-        ldr   r0, =0x80002500
-        mcr   CP32(r0, HTCR)
-
-        /* Set up the HSCTLR:
-         * Exceptions in LE ARM,
-         * Low-latency IRQs disabled,
-         * Write-implies-XN disabled (for now),
-         * D-cache disabled (for now),
-         * I-cache enabled,
-         * Alignment checking enabled,
-         * MMU translation disabled (for now). */
-        ldr   r0, =(HSCTLR_BASE|SCTLR_A)
-        mcr   CP32(r0, HSCTLR)
-
-        /* Write Xen's PT's paddr into the HTTBR */
-        ldr   r4, =xen_pgtable
-        add   r4, r4, r10            /* r4 := paddr (xen_pagetable) */
-        mov   r5, #0                 /* r4:r5 is paddr (xen_pagetable) */
-        mcrr  CP64(r4, r5, HTTBR)
-
-        /* Non-boot CPUs don't need to rebuild the pagetable */
-        teq   r12, #0
-        bne   pt_ready
-
-        /* console fixmap */
-#ifdef EARLY_UART_ADDRESS
-        ldr   r1, =xen_fixmap
-        add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
-        mov   r3, #0
-        lsr   r2, r11, #12
-        lsl   r2, r2, #12            /* 4K aligned paddr of UART */
-        orr   r2, r2, #PT_UPPER(DEV_L3)
-        orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */
-        strd  r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's slot */
-#endif
-
-        /* Build the baseline idle pagetable's first-level entries */
-        ldr   r1, =xen_second
-        add   r1, r1, r10            /* r1 := paddr (xen_second) */
-        mov   r3, #0x0
-        orr   r2, r1, #PT_UPPER(PT)  /* r2:r3 := table map of xen_second */
-        orr   r2, r2, #PT_LOWER(PT)  /* (+ rights for linear PT) */
-        strd  r2, r3, [r4, #0]       /* Map it in slot 0 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #8]       /* Map 2nd page in slot 1 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #16]      /* Map 3rd page in slot 2 */
-        add   r2, r2, #0x1000
-        strd  r2, r3, [r4, #24]      /* Map 4th page in slot 3 */
-
-        /* Now set up the second-level entries */
-        orr   r2, r9, #PT_UPPER(MEM)
-        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB normal map of Xen */
-        mov   r4, r9, lsr #18        /* Slot for paddr(start) */
-        strd  r2, r3, [r1, r4]       /* Map Xen there */
-        ldr   r4, =start
-        lsr   r4, #18                /* Slot for vaddr(start) */
-        strd  r2, r3, [r1, r4]       /* Map Xen there too */
-
-        /* xen_fixmap pagetable */
-        ldr   r2, =xen_fixmap
-        add   r2, r2, r10            /* r2 := paddr (xen_fixmap) */
-        orr   r2, r2, #PT_UPPER(PT)
-        orr   r2, r2, #PT_LOWER(PT)  /* r2:r3 := table map of xen_fixmap */
-        add   r4, r4, #8
-        strd  r2, r3, [r1, r4]       /* Map it in the fixmap's slot */
-
-        mov   r3, #0x0
-        lsr   r2, r8, #21
-        lsl   r2, r2, #21            /* 2MB-aligned paddr of DTB */
-        orr   r2, r2, #PT_UPPER(MEM)
-        orr   r2, r2, #PT_LOWER(MEM) /* r2:r3 := 2MB RAM incl. DTB */
-        add   r4, r4, #8
-        strd  r2, r3, [r1, r4]       /* Map it in the early boot slot */
-
-pt_ready:
-        PRINT("- Turning on paging -\r\n")
-
-        ldr   r1, =paging            /* Explicit vaddr, not RIP-relative */
-        mrc   CP32(r0, HSCTLR)
-        orr   r0, r0, #(SCTLR_M|SCTLR_C) /* Enable MMU and D-cache */
-        dsb                          /* Flush PTE writes and finish reads */
-        mcr   CP32(r0, HSCTLR)       /* now paging is enabled */
-        isb                          /* Now, flush the icache */
-        mov   pc, r1                 /* Get a proper vaddr into PC */
-paging:
-
-
-#ifdef EARLY_UART_ADDRESS
-        /* Use a virtual address to access the UART. */
-        ldr   r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
-#endif
-
-        PRINT("- Ready -\r\n")
-
-        /* The boot CPU should go straight into C now */
-        teq   r12, #0
-        beq   launch
-
-        /* Non-boot CPUs need to move on to the relocated pagetables */
-        mov   r0, #0
-        ldr   r4, =boot_httbr        /* VA of HTTBR value stashed by CPU 0 */
-        add   r4, r4, r10            /* PA of it */
-        ldrd  r4, r5, [r4]           /* Actual value */
-        dsb
-        mcrr  CP64(r4, r5, HTTBR)
-        dsb
-        isb
-        mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
-        mcr   CP32(r0, ICIALLU)      /* Flush I-cache */
-        mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
-        dsb                          /* Ensure completion of TLB+BP flush */
-        isb
-
-        /* Non-boot CPUs report that they've got this far */
-        ldr   r0, =ready_cpus
-1:      ldrex r1, [r0]               /*            { read # of ready CPUs } */
-        add   r1, r1, #1             /* Atomically { ++                   } */
-        strex r2, r1, [r0]           /*            { writeback            } */
-        teq   r2, #0
-        bne   1b
-        dsb
-        mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
-        dsb
-
-        /* Here, the non-boot CPUs must wait again -- they're now running on
-         * the boot CPU's pagetables so it's safe for the boot CPU to
-         * overwrite the non-relocated copy of Xen.  Once it's done that,
-         * and brought up the memory allocator, non-boot CPUs can get their
-         * own stacks and enter C. */
-1:      wfe
-        dsb
-        ldr   r0, =smp_up_cpu
-        ldr   r1, [r0]               /* Which CPU is being booted? */
-        teq   r1, r12                /* Is it us? */
-        bne   1b
-
-launch:
-        ldr   r0, =init_stack        /* Find the boot-time stack */
-        ldr   sp, [r0]
-        add   sp, #STACK_SIZE        /* (which grows down from the top). */
-        sub   sp, #CPUINFO_sizeof    /* Make room for CPU save record */
-        mov   r0, r10                /* Marshal args: - phys_offset */
-        mov   r1, r7                 /*               - machine type */
-        mov   r2, r8                 /*               - ATAG address */
-        movs  r3, r12                /*               - CPU ID */
-        beq   start_xen              /* and disappear into the land of C */
-        b     start_secondary        /* (to the appropriate entry point) */
-
-/* Fail-stop
- * r0: string explaining why */
-fail:   PRINT("- Boot failed -\r\n")
-1:      wfe
-        b     1b
-
-#ifdef EARLY_UART_ADDRESS
-
-/* Bring up the UART. Specific to the PL011 UART.
- * Clobbers r0-r2 */
-init_uart:
-        mov   r1, #0x0
-        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor fraction) */
-        mov   r1, #0x4               /* 7.3728MHz / 0x4 == 16 * 115200 */
-        str   r1, [r11, #0x24]       /* -> UARTIBRD (Baud divisor integer) */
-        mov   r1, #0x60              /* 8n1 */
-        str   r1, [r11, #0x24]       /* -> UARTLCR_H (Line control) */
-        ldr   r1, =0x00000301        /* RXE | TXE | UARTEN */
-        str   r1, [r11, #0x30]       /* -> UARTCR (Control Register) */
-        adr   r0, 1f
-        b     puts
-1:      .asciz "- UART enabled -\r\n"
-        .align 4
-
-/* Print early debug messages.  Specific to the PL011 UART.
- * r0: Nul-terminated string to print.
- * Clobbers r0-r2 */
-puts:
-        ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-        tst   r2, #0x8               /* Check BUSY bit */
-        bne   puts                   /* Wait for the UART to be ready */
-        ldrb  r2, [r0], #1           /* Load next char */
-        teq   r2, #0                 /* Exit on nul */
-        moveq pc, lr
-        str   r2, [r11]              /* -> UARTDR (Data Register) */
-        b     puts
-
-/* Print a 32-bit number in hex.  Specific to the PL011 UART.
- * r0: Number to print.
- * clobbers r0-r3 */
-putn:
-        adr   r1, hex
-        mov   r3, #8
-1:      ldr   r2, [r11, #0x18]       /* <- UARTFR (Flag register) */
-        tst   r2, #0x8               /* Check BUSY bit */
-        bne   1b                     /* Wait for the UART to be ready */
-        and   r2, r0, #0xf0000000    /* Mask off the top nybble */
-        ldrb  r2, [r1, r2, lsr #28]  /* Convert to a char */
-        str   r2, [r11]              /* -> UARTDR (Data Register) */
-        lsl   r0, #4                 /* Roll it through one nybble at a time */
-        subs  r3, r3, #1
-        bne   1b
-        mov   pc, lr
-
-hex:    .ascii "0123456789abcdef"
-        .align 2
-
-#else  /* EARLY_UART_ADDRESS */
-
-init_uart:
-.global early_puts
-early_puts:
-puts:
-putn:   mov   pc, lr
-
-#endif /* EARLY_UART_ADDRESS */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index a50281b04b..587a725dc2 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -25,7 +25,7 @@
 #include <xen/errno.h>
 #include <xen/sched.h>
 
-#include "gic.h"
+#include <asm/gic.h>
 
 static void enable_none(struct irq_desc *irq) { }
 static unsigned int startup_none(struct irq_desc *irq) { return 0; }
diff --git a/xen/arch/arm/lib/Makefile b/xen/arch/arm/lib/Makefile
deleted file mode 100644
index 4cf41f41e1..0000000000
--- a/xen/arch/arm/lib/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-obj-y += memcpy.o memmove.o memset.o memzero.o
-obj-y += findbit.o setbit.o
-obj-y += setbit.o clearbit.o changebit.o
-obj-y += testsetbit.o testclearbit.o testchangebit.o
-obj-y += lib1funcs.o lshrdi3.o div64.o
diff --git a/xen/arch/arm/lib/assembler.h b/xen/arch/arm/lib/assembler.h
deleted file mode 100644
index f8d4b3aa04..0000000000
--- a/xen/arch/arm/lib/assembler.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/* From Linux arch/arm/include/asm/assembler.h */
-/*
- *  arch/arm/include/asm/assembler.h
- *
- *  Copyright (C) 1996-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  This file contains arm architecture specific defines
- *  for the different processors.
- *
- *  Do not include any C declarations in this file - it is included by
- *  assembler source.
- */
-#ifndef __ASM_ASSEMBLER_H__
-#define __ASM_ASSEMBLER_H__
-
-#ifndef __ASSEMBLY__
-#error "Only include this from assembly code"
-#endif
-
-// No Thumb, hence:
-#define W(instr)        instr
-#define ARM(instr...)   instr
-#define THUMB(instr...)
-
-#ifdef CONFIG_ARM_UNWIND
-#define UNWIND(code...)         code
-#else
-#define UNWIND(code...)
-#endif
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
-#define get_byte_0      lsl #0
-#define get_byte_1	lsr #8
-#define get_byte_2	lsr #16
-#define get_byte_3	lsr #24
-#define put_byte_0      lsl #0
-#define put_byte_1	lsl #8
-#define put_byte_2	lsl #16
-#define put_byte_3	lsl #24
-#else
-#define pull            lsl
-#define push            lsr
-#define get_byte_0	lsr #24
-#define get_byte_1	lsr #16
-#define get_byte_2	lsr #8
-#define get_byte_3      lsl #0
-#define put_byte_0	lsl #24
-#define put_byte_1	lsl #16
-#define put_byte_2	lsl #8
-#define put_byte_3      lsl #0
-#endif
-
-/*
- * Data preload for architectures that support it
- */
-#if __LINUX_ARM_ARCH__ >= 5
-#define PLD(code...)	code
-#else
-#define PLD(code...)
-#endif
-
-/*
- * This can be used to enable code to cacheline align the destination
- * pointer when bulk writing to memory.  Experiments on StrongARM and
- * XScale didn't show this a worthwhile thing to do when the cache is not
- * set to write-allocate (this would need further testing on XScale when WA
- * is used).
- *
- * On Feroceon there is much to gain however, regardless of cache mode.
- */
-#ifdef CONFIG_CPU_FEROCEON
-#define CALGN(code...) code
-#else
-#define CALGN(code...)
-#endif
-
-/*
- * Enable and disable interrupts
- */
-#if __LINUX_ARM_ARCH__ >= 6
-	.macro	disable_irq_notrace
-	cpsid	i
-	.endm
-
-	.macro	enable_irq_notrace
-	cpsie	i
-	.endm
-#else
-	.macro	disable_irq_notrace
-	msr	cpsr_c, #PSR_I_BIT | SVC_MODE
-	.endm
-
-	.macro	enable_irq_notrace
-	msr	cpsr_c, #SVC_MODE
-	.endm
-#endif
-
-	.macro asm_trace_hardirqs_off
-#if defined(CONFIG_TRACE_IRQFLAGS)
-	stmdb   sp!, {r0-r3, ip, lr}
-	bl	trace_hardirqs_off
-	ldmia	sp!, {r0-r3, ip, lr}
-#endif
-	.endm
-
-	.macro asm_trace_hardirqs_on_cond, cond
-#if defined(CONFIG_TRACE_IRQFLAGS)
-	/*
-	 * actually the registers should be pushed and pop'd conditionally, but
-	 * after bl the flags are certainly clobbered
-	 */
-	stmdb   sp!, {r0-r3, ip, lr}
-	bl\cond	trace_hardirqs_on
-	ldmia	sp!, {r0-r3, ip, lr}
-#endif
-	.endm
-
-	.macro asm_trace_hardirqs_on
-	asm_trace_hardirqs_on_cond al
-	.endm
-
-	.macro disable_irq
-	disable_irq_notrace
-	asm_trace_hardirqs_off
-	.endm
-
-	.macro enable_irq
-	asm_trace_hardirqs_on
-	enable_irq_notrace
-	.endm
-/*
- * Save the current IRQ state and disable IRQs.  Note that this macro
- * assumes FIQs are enabled, and that the processor is in SVC mode.
- */
-	.macro	save_and_disable_irqs, oldcpsr
-	mrs	\oldcpsr, cpsr
-	disable_irq
-	.endm
-
-/*
- * Restore interrupt state previously stored in a register.  We don't
- * guarantee that this will preserve the flags.
- */
-	.macro	restore_irqs_notrace, oldcpsr
-	msr	cpsr_c, \oldcpsr
-	.endm
-
-	.macro restore_irqs, oldcpsr
-	tst	\oldcpsr, #PSR_I_BIT
-	asm_trace_hardirqs_on_cond eq
-	restore_irqs_notrace \oldcpsr
-	.endm
-
-#define USER(x...)				\
-9999:	x;					\
-	.pushsection __ex_table,"a";		\
-	.align	3;				\
-	.long	9999b,9001f;			\
-	.popsection
-
-#ifdef CONFIG_SMP
-#define ALT_SMP(instr...)					\
-9998:	instr
-/*
- * Note: if you get assembler errors from ALT_UP() when building with
- * CONFIG_THUMB2_KERNEL, you almost certainly need to use
- * ALT_SMP( W(instr) ... )
- */
-#define ALT_UP(instr...)					\
-	.pushsection ".alt.smp.init", "a"			;\
-	.long	9998b						;\
-9997:	instr							;\
-	.if . - 9997b != 4					;\
-		.error "ALT_UP() content must assemble to exactly 4 bytes";\
-	.endif							;\
-	.popsection
-#define ALT_UP_B(label)					\
-	.equ	up_b_offset, label - 9998b			;\
-	.pushsection ".alt.smp.init", "a"			;\
-	.long	9998b						;\
-	W(b)	. + up_b_offset					;\
-	.popsection
-#else
-#define ALT_SMP(instr...)
-#define ALT_UP(instr...) instr
-#define ALT_UP_B(label) b label
-#endif
-
-/*
- * Instruction barrier
- */
-	.macro	instr_sync
-#if __LINUX_ARM_ARCH__ >= 7
-	isb
-#elif __LINUX_ARM_ARCH__ == 6
-	mcr	p15, 0, r0, c7, c5, 4
-#endif
-	.endm
-
-/*
- * SMP data memory barrier
- */
-	.macro	smp_dmb mode
-#ifdef CONFIG_SMP
-#if __LINUX_ARM_ARCH__ >= 7
-	.ifeqs "\mode","arm"
-	ALT_SMP(dmb)
-	.else
-	ALT_SMP(W(dmb))
-	.endif
-#elif __LINUX_ARM_ARCH__ == 6
-	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
-#else
-#error Incompatible SMP platform
-#endif
-	.ifeqs "\mode","arm"
-	ALT_UP(nop)
-	.else
-	ALT_UP(W(nop))
-	.endif
-#endif
-	.endm
-
-#ifdef CONFIG_THUMB2_KERNEL
-	.macro	setmode, mode, reg
-	mov	\reg, #\mode
-	msr	cpsr_c, \reg
-	.endm
-#else
-	.macro	setmode, mode, reg
-	msr	cpsr_c, #\mode
-	.endm
-#endif
-
-/*
- * STRT/LDRT access macros with ARM and Thumb-2 variants
- */
-#ifdef CONFIG_THUMB2_KERNEL
-
-	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T()
-9999:
-	.if	\inc == 1
-	\instr\cond\()b\()\t\().w \reg, [\ptr, #\off]
-	.elseif	\inc == 4
-	\instr\cond\()\t\().w \reg, [\ptr, #\off]
-	.else
-	.error	"Unsupported inc macro argument"
-	.endif
-
-	.pushsection __ex_table,"a"
-	.align	3
-	.long	9999b, \abort
-	.popsection
-	.endm
-
-	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
-	@ explicit IT instruction needed because of the label
-	@ introduced by the USER macro
-	.ifnc	\cond,al
-	.if	\rept == 1
-	itt	\cond
-	.elseif	\rept == 2
-	ittt	\cond
-	.else
-	.error	"Unsupported rept macro argument"
-	.endif
-	.endif
-
-	@ Slightly optimised to avoid incrementing the pointer twice
-	usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
-	.if	\rept == 2
-	usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort
-	.endif
-
-	add\cond \ptr, #\rept * \inc
-	.endm
-
-#else	/* !CONFIG_THUMB2_KERNEL */
-
-	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort, t=T()
-	.rept	\rept
-9999:
-	.if	\inc == 1
-	\instr\cond\()b\()\t \reg, [\ptr], #\inc
-	.elseif	\inc == 4
-	\instr\cond\()\t \reg, [\ptr], #\inc
-	.else
-	.error	"Unsupported inc macro argument"
-	.endif
-
-	.pushsection __ex_table,"a"
-	.align	3
-	.long	9999b, \abort
-	.popsection
-	.endr
-	.endm
-
-#endif	/* CONFIG_THUMB2_KERNEL */
-
-	.macro	strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
-	usracc	str, \reg, \ptr, \inc, \cond, \rept, \abort
-	.endm
-
-	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
-	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
-	.endm
-
-/* Utility macro for declaring string literals */
-	.macro	string name:req, string
-	.type \name , #object
-\name:
-	.asciz "\string"
-	.size \name , . - \name
-	.endm
-
-#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/xen/arch/arm/lib/bitops.h b/xen/arch/arm/lib/bitops.h
deleted file mode 100644
index 689f2e8665..0000000000
--- a/xen/arch/arm/lib/bitops.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <xen/config.h>
-
-#if __LINUX_ARM_ARCH__ >= 6
-	.macro	bitop, instr
-	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
-	mov	r2, #1
-	and	r3, r0, #31		@ Get bit offset
-	mov	r0, r0, lsr #5
-	add	r1, r1, r0, lsl #2	@ Get word offset
-	mov	r3, r2, lsl r3
-1:	ldrex	r2, [r1]
-	\instr	r2, r2, r3
-	strex	r0, r2, [r1]
-	cmp	r0, #0
-	bne	1b
-	bx	lr
-	.endm
-
-	.macro	testop, instr, store
-	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
-	mov	r2, #1
-	and	r3, r0, #31		@ Get bit offset
-	mov	r0, r0, lsr #5
-	add	r1, r1, r0, lsl #2	@ Get word offset
-	mov	r3, r2, lsl r3		@ create mask
-	smp_dmb
-1:	ldrex	r2, [r1]
-	ands	r0, r2, r3		@ save old value of bit
-	\instr	r2, r2, r3		@ toggle bit
-	strex	ip, r2, [r1]
-	cmp	ip, #0
-	bne	1b
-	smp_dmb
-	cmp	r0, #0
-	movne	r0, #1
-2:	bx	lr
-	.endm
-#else
-	.macro	bitop, name, instr
-ENTRY(	\name		)
-UNWIND(	.fnstart	)
-	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
-	and	r2, r0, #31
-	mov	r0, r0, lsr #5
-	mov	r3, #1
-	mov	r3, r3, lsl r2
-	save_and_disable_irqs ip
-	ldr	r2, [r1, r0, lsl #2]
-	\instr	r2, r2, r3
-	str	r2, [r1, r0, lsl #2]
-	restore_irqs ip
-	mov	pc, lr
-UNWIND(	.fnend		)
-ENDPROC(\name		)
-	.endm
-
-/**
- * testop - implement a test_and_xxx_bit operation.
- * @instr: operational instruction
- * @store: store instruction
- *
- * Note: we can trivially conditionalise the store instruction
- * to avoid dirtying the data cache.
- */
-	.macro	testop, name, instr, store
-ENTRY(	\name		)
-UNWIND(	.fnstart	)
-	ands	ip, r1, #3
-	strneb	r1, [ip]		@ assert word-aligned
-	and	r3, r0, #31
-	mov	r0, r0, lsr #5
-	save_and_disable_irqs ip
-	ldr	r2, [r1, r0, lsl #2]!
-	mov	r0, #1
-	tst	r2, r0, lsl r3
-	\instr	r2, r2, r0, lsl r3
-	\store	r2, [r1]
-	moveq	r0, #0
-	restore_irqs ip
-	mov	pc, lr
-UNWIND(	.fnend		)
-ENDPROC(\name		)
-	.endm
-#endif
diff --git a/xen/arch/arm/lib/changebit.S b/xen/arch/arm/lib/changebit.S
deleted file mode 100644
index 62954bcd07..0000000000
--- a/xen/arch/arm/lib/changebit.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/changebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_change_bit)
-	bitop	eor
-ENDPROC(_change_bit)
diff --git a/xen/arch/arm/lib/clearbit.S b/xen/arch/arm/lib/clearbit.S
deleted file mode 100644
index 42ce41656d..0000000000
--- a/xen/arch/arm/lib/clearbit.S
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  linux/arch/arm/lib/clearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_clear_bit)
-	bitop	bic
-ENDPROC(_clear_bit)
diff --git a/xen/arch/arm/lib/copy_template.S b/xen/arch/arm/lib/copy_template.S
deleted file mode 100644
index 805e3f8fb0..0000000000
--- a/xen/arch/arm/lib/copy_template.S
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- *  linux/arch/arm/lib/copy_template.s
- *
- *  Code template for optimized memory copy functions
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-/*
- * Theory of operation
- * -------------------
- *
- * This file provides the core code for a forward memory copy used in
- * the implementation of memcopy(), copy_to_user() and copy_from_user().
- *
- * The including file must define the following accessor macros
- * according to the need of the given function:
- *
- * ldr1w ptr reg abort
- *
- *	This loads one word from 'ptr', stores it in 'reg' and increments
- *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
- *
- * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- *
- *	This loads four or eight words starting from 'ptr', stores them
- *	in provided registers and increments 'ptr' past those words.
- *	The'abort' argument is used for fixup tables.
- *
- * ldr1b ptr reg cond abort
- *
- *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *	It also must apply the condition code if provided, otherwise the
- *	"al" condition is assumed by default.
- *
- * str1w ptr reg abort
- * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- * str1b ptr reg cond abort
- *
- *	Same as their ldr* counterparts, but data is stored to 'ptr' location
- *	rather than being loaded.
- *
- * enter reg1 reg2
- *
- *	Preserve the provided registers on the stack plus any additional
- *	data as needed by the implementation including this code. Called
- *	upon code entry.
- *
- * exit reg1 reg2
- *
- *	Restore registers with the values previously saved with the
- *	'preserv' macro. Called upon code termination.
- *
- * LDR1W_SHIFT
- * STR1W_SHIFT
- *
- *	Correction to be applied to the "ip" register when branching into
- *	the ldr1w or str1w instructions (some of these macros may expand to
- *	than one 32bit instruction in Thumb-2)
- */
-
-
-		enter	r4, lr
-
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #0]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	r3, ip, #32		)
-	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, r3		)  @ C gets set
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #0]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-3:	PLD(	pld	[r1, #124]		)
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		subs	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-#if LDR1W_SHIFT > 0
-		lsl	ip, ip, #LDR1W_SHIFT
-#endif
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:
-		.rept	(1 << LDR1W_SHIFT)
-		W(nop)
-		.endr
-		ldr1w	r1, r3, abort=20f
-		ldr1w	r1, r4, abort=20f
-		ldr1w	r1, r5, abort=20f
-		ldr1w	r1, r6, abort=20f
-		ldr1w	r1, r7, abort=20f
-		ldr1w	r1, r8, abort=20f
-		ldr1w	r1, lr, abort=20f
-
-#if LDR1W_SHIFT < STR1W_SHIFT
-		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
-#elif LDR1W_SHIFT > STR1W_SHIFT
-		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
-#endif
-		add	pc, pc, ip
-		nop
-		.rept	(1 << STR1W_SHIFT)
-		W(nop)
-		.endr
-		str1w	r0, r3, abort=20f
-		str1w	r0, r4, abort=20f
-		str1w	r0, r5, abort=20f
-		str1w	r0, r6, abort=20f
-		str1w	r0, r7, abort=20f
-		str1w	r0, r8, abort=20f
-		str1w	r0, lr, abort=20f
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-
-8:		movs	r2, r2, lsl #31
-		ldr1b	r1, r3, ne, abort=21f
-		ldr1b	r1, r4, cs, abort=21f
-		ldr1b	r1, ip, cs, abort=21f
-		str1b	r0, r3, ne, abort=21f
-		str1b	r0, r4, cs, abort=21f
-		str1b	r0, ip, cs, abort=21f
-
-		exit	r4, pc
-
-9:		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldr1b	r1, r3, gt, abort=21f
-		ldr1b	r1, r4, ge, abort=21f
-		ldr1b	r1, lr, abort=21f
-		str1b	r0, r3, gt, abort=21f
-		str1b	r0, r4, ge, abort=21f
-		subs	r2, r2, ip
-		str1b	r0, lr, abort=21f
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr1w	r1, lr, abort=21f
-		beq	17f
-		bgt	18f
-
-
-		.macro	forward_copy_shift pull push
-
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-
-	PLD(	pld	[r1, #0]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-12:	PLD(	pld	[r1, #124]		)
-13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
-		subs	r2, r2, #32
-		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
-		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov	r3, lr, pull #\pull
-		ldr1w	r1, lr, abort=21f
-		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
-		str1w	r0, r3, abort=21f
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		sub	r1, r1, #(\push / 8)
-		b	8b
-
-		.endm
-
-
-		forward_copy_shift	pull=8	push=24
-
-17:		forward_copy_shift	pull=16	push=16
-
-18:		forward_copy_shift	pull=24	push=8
-
-
-/*
- * Abort preamble and completion macros.
- * If a fixup handler is required then those macros must surround it.
- * It is assumed that the fixup code will handle the private part of
- * the exit macro.
- */
-
-	.macro	copy_abort_preamble
-19:	ldmfd	sp!, {r5 - r9}
-	b	21f
-20:	ldmfd	sp!, {r5 - r8}
-21:
-	.endm
-
-	.macro	copy_abort_end
-	ldmfd	sp!, {r4, pc}
-	.endm
-
diff --git a/xen/arch/arm/lib/div64.S b/xen/arch/arm/lib/div64.S
deleted file mode 100644
index 83a5f22a84..0000000000
--- a/xen/arch/arm/lib/div64.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- *  linux/arch/arm/lib/div64.S
- *
- *  Optimized computation of 64-bit dividend / 32-bit divisor
- *
- *  Author:	Nicolas Pitre
- *  Created:	Oct 5, 2003
- *  Copyright:	Monta Vista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-#include "assembler.h"
-	
-#ifdef __ARMEB__
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#else
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#endif
-
-/*
- * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
- *
- * Note: Calling convention is totally non standard for optimal code.
- *       This is meant to be used by do_div() from include/asm/div64.h only.
- *
- * Input parameters:
- * 	xh-xl	= dividend (clobbered)
- * 	r4	= divisor (preserved)
- *
- * Output values:
- * 	yh-yl	= result
- * 	xh	= remainder
- *
- * Clobbered regs: xl, ip
- */
-
-ENTRY(__do_div64)
-UNWIND(.fnstart)
-
-	@ Test for easy paths first.
-	subs	ip, r4, #1
-	bls	9f			@ divisor is 0 or 1
-	tst	ip, r4
-	beq	8f			@ divisor is power of 2
-
-	@ See if we need to handle upper 32-bit result.
-	cmp	xh, r4
-	mov	yh, #0
-	blo	3f
-
-	@ Align divisor with upper part of dividend.
-	@ The aligned divisor is stored in yl preserving the original.
-	@ The bit position is stored in ip.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	yl, r4
-	clz	ip, xh
-	sub	yl, yl, ip
-	mov	ip, #1
-	mov	ip, ip, lsl yl
-	mov	yl, r4, lsl yl
-
-#else
-
-	mov	yl, r4
-	mov	ip, #1
-1:	cmp	yl, #0x80000000
-	cmpcc	yl, xh
-	movcc	yl, yl, lsl #1
-	movcc	ip, ip, lsl #1
-	bcc	1b
-
-#endif
-
-	@ The division loop for needed upper bit positions.
- 	@ Break out early if dividend reaches 0.
-2:	cmp	xh, yl
-	orrcs	yh, yh, ip
-	subcss	xh, xh, yl
-	movnes	ip, ip, lsr #1
-	mov	yl, yl, lsr #1
-	bne	2b
-
-	@ See if we need to handle lower 32-bit result.
-3:	cmp	xh, #0
-	mov	yl, #0
-	cmpeq	xl, r4
-	movlo	xh, xl
-	movlo	pc, lr
-
-	@ The division loop for lower bit positions.
-	@ Here we shift remainer bits leftwards rather than moving the
-	@ divisor for comparisons, considering the carry-out bit as well.
-	mov	ip, #0x80000000
-4:	movs	xl, xl, lsl #1
-	adcs	xh, xh, xh
-	beq	6f
-	cmpcc	xh, r4
-5:	orrcs	yl, yl, ip
-	subcs	xh, xh, r4
-	movs	ip, ip, lsr #1
-	bne	4b
-	mov	pc, lr
-
-	@ The top part of remainder became zero.  If carry is set
-	@ (the 33th bit) this is a false positive so resume the loop.
-	@ Otherwise, if lower part is also null then we are done.
-6:	bcs	5b
-	cmp	xl, #0
-	moveq	pc, lr
-
-	@ We still have remainer bits in the low part.  Bring them up.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	xh, xl			@ we know xh is zero here so...
-	add	xh, xh, #1
-	mov	xl, xl, lsl xh
-	mov	ip, ip, lsr xh
-
-#else
-
-7:	movs	xl, xl, lsl #1
-	mov	ip, ip, lsr #1
-	bcc	7b
-
-#endif
-
-	@ Current remainder is now 1.  It is worthless to compare with
-	@ divisor at this point since divisor can not be smaller than 3 here.
-	@ If possible, branch for another shift in the division loop.
-	@ If no bit position left then we are done.
-	movs	ip, ip, lsr #1
-	mov	xh, #1
-	bne	4b
-	mov	pc, lr
-
-8:	@ Division by a power of 2: determine what that divisor order is
-	@ then simply shift values around
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	ip, r4
-	rsb	ip, ip, #31
-
-#else
-
-	mov	yl, r4
-	cmp	r4, #(1 << 16)
-	mov	ip, #0
-	movhs	yl, yl, lsr #16
-	movhs	ip, #16
-
-	cmp	yl, #(1 << 8)
-	movhs	yl, yl, lsr #8
-	addhs	ip, ip, #8
-
-	cmp	yl, #(1 << 4)
-	movhs	yl, yl, lsr #4
-	addhs	ip, ip, #4
-
-	cmp	yl, #(1 << 2)
-	addhi	ip, ip, #3
-	addls	ip, ip, yl, lsr #1
-
-#endif
-
-	mov	yh, xh, lsr ip
-	mov	yl, xl, lsr ip
-	rsb	ip, ip, #32
- ARM(	orr	yl, yl, xh, lsl ip	)
- THUMB(	lsl	xh, xh, ip		)
- THUMB(	orr	yl, yl, xh		)
-	mov	xh, xl, lsl ip
-	mov	xh, xh, lsr ip
-	mov	pc, lr
-
-	@ eq -> division by 1: obvious enough...
-9:	moveq	yl, xl
-	moveq	yh, xh
-	moveq	xh, #0
-	moveq	pc, lr
-UNWIND(.fnend)
-
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-Ldiv0_64:
-	@ Division by 0:
-	str	lr, [sp, #-8]!
-	bl	__div0
-
-	@ as wrong as it could be...
-	mov	yl, #0
-	mov	yh, #0
-	mov	xh, #0
-	ldr	pc, [sp], #8
-
-UNWIND(.fnend)
-ENDPROC(__do_div64)
diff --git a/xen/arch/arm/lib/findbit.S b/xen/arch/arm/lib/findbit.S
deleted file mode 100644
index 2fbcc82995..0000000000
--- a/xen/arch/arm/lib/findbit.S
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- *  linux/arch/arm/lib/findbit.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 16th March 2001 - John Ripley <jripley@sonicblue.com>
- *   Fixed so that "size" is an exclusive not an inclusive quantity.
- *   All users of these functions expect exclusive sizes, and may
- *   also call with zero size.
- * Reworked by rmk.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-                .text
-
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_zero_bit_le)
-		teq	r1, #0	
-		beq	3f
-		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_zero_bit_le)
-
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_zero_bit_le)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_le)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_bit_le)
-		teq	r1, #0	
-		beq	3f
-		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_bit_le)
-
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_bit_le)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_le)
-
-#ifdef __ARMEB__
-
-ENTRY(_find_first_zero_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_zero_bit_be)
-
-ENTRY(_find_next_zero_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_be)
-
-ENTRY(_find_first_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(_find_first_bit_be)
-
-ENTRY(_find_next_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_be)
-
-#endif
-
-/*
- * One or more bits in the LSB of r3 are assumed to be set.
- */
-.L_found:
-#if __LINUX_ARM_ARCH__ >= 5
-		rsb	r0, r3, #0
-		and	r3, r3, r0
-		clz	r3, r3
-		rsb	r3, r3, #31
-		add	r0, r2, r3
-#else
-		tst	r3, #0x0f
-		addeq	r2, r2, #4
-		movne	r3, r3, lsl #4
-		tst	r3, #0x30
-		addeq	r2, r2, #2
-		movne	r3, r3, lsl #2
-		tst	r3, #0x40
-		addeq	r2, r2, #1
-		mov	r0, r2
-#endif
-		cmp	r1, r0			@ Clamp to maxbit
-		movlo	r0, r1
-		mov	pc, lr
-
diff --git a/xen/arch/arm/lib/lib1funcs.S b/xen/arch/arm/lib/lib1funcs.S
deleted file mode 100644
index 95ee3121d9..0000000000
--- a/xen/arch/arm/lib/lib1funcs.S
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
- *
- * Author: Nicolas Pitre <nico@fluxnic.net>
- *   - contributed to gcc-3.4 on Sep 30, 2003
- *   - adapted for the Linux kernel on Oct 2, 2003
- */
-
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-
-#include <xen/config.h>
-#include "assembler.h"
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\curbit, \divisor
-	clz	\result, \dividend
-	sub	\result, \curbit, \result
-	mov	\curbit, #1
-	mov	\divisor, \divisor, lsl \result
-	mov	\curbit, \curbit, lsl \result
-	mov	\result, #0
-	
-#else
-
-	@ Initially shift the divisor left 3 bits if possible,
-	@ set curbit accordingly.  This allows for curbit to be located
-	@ at the left end of each 4 bit nibbles in the division loop
-	@ to save one loop in most cases.
-	tst	\divisor, #0xe0000000
-	moveq	\divisor, \divisor, lsl #3
-	moveq	\curbit, #8
-	movne	\curbit, #1
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	movlo	\curbit, \curbit, lsl #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	movlo	\curbit, \curbit, lsl #1
-	blo	1b
-
-	mov	\result, #0
-
-#endif
-
-	@ Division loop
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	orrhs	\result,   \result,   \curbit
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	orrhs	\result,   \result,   \curbit,  lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	orrhs	\result,   \result,   \curbit,  lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	orrhs	\result,   \result,   \curbit,  lsr #3
-	cmp	\dividend, #0			@ Early termination?
-	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
-	movne	\divisor,  \divisor, lsr #4
-	bne	1b
-
-.endm
-
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	rsb	\order, \order, #31
-
-#else
-
-	cmp	\divisor, #(1 << 16)
-	movhs	\divisor, \divisor, lsr #16
-	movhs	\order, #16
-	movlo	\order, #0
-
-	cmp	\divisor, #(1 << 8)
-	movhs	\divisor, \divisor, lsr #8
-	addhs	\order, \order, #8
-
-	cmp	\divisor, #(1 << 4)
-	movhs	\divisor, \divisor, lsr #4
-	addhs	\order, \order, #4
-
-	cmp	\divisor, #(1 << 2)
-	addhi	\order, \order, #3
-	addls	\order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	clz	\spare, \dividend
-	sub	\order, \order, \spare
-	mov	\divisor, \divisor, lsl \order
-
-#else
-
-	mov	\order, #0
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	addlo	\order, \order, #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	addlo	\order, \order, #1
-	blo	1b
-
-#endif
-
-	@ Perform all needed substractions to keep only the reminder.
-	@ Do comparisons in batch of 4 first.
-	subs	\order, \order, #3		@ yes, 3 is intended here
-	blt	2f
-
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	cmp	\dividend, #1
-	mov	\divisor, \divisor, lsr #4
-	subges	\order, \order, #4
-	bge	1b
-
-	tst	\order, #3
-	teqne	\dividend, #0
-	beq	5f
-
-	@ Either 1, 2 or 3 comparison/substractions are left.
-2:	cmn	\order, #2
-	blt	4f
-	beq	3f
-	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-3:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-4:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-5:
-.endm
-
-
-ENTRY(__udivsi3)
-ENTRY(__aeabi_uidiv)
-UNWIND(.fnstart)
-
-	subs	r2, r1, #1
-	moveq	pc, lr
-	bcc	Ldiv0
-	cmp	r0, r1
-	bls	11f
-	tst	r1, r2
-	beq	12f
-
-	ARM_DIV_BODY r0, r1, r2, r3
-
-	mov	r0, r2
-	mov	pc, lr
-
-11:	moveq	r0, #1
-	movne	r0, #0
-	mov	pc, lr
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	mov	r0, r0, lsr r2
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__udivsi3)
-ENDPROC(__aeabi_uidiv)
-
-ENTRY(__umodsi3)
-UNWIND(.fnstart)
-
-	subs	r2, r1, #1			@ compare divisor with 1
-	bcc	Ldiv0
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq   r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	movls	pc, lr
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__umodsi3)
-
-ENTRY(__divsi3)
-ENTRY(__aeabi_idiv)
-UNWIND(.fnstart)
-
-	cmp	r1, #0
-	eor	ip, r0, r1			@ save the sign of the result.
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	subs	r2, r1, #1			@ division by 1 or -1 ?
-	beq	10f
-	movs	r3, r0
-	rsbmi	r3, r0, #0			@ positive dividend value
-	cmp	r3, r1
-	bls	11f
-	tst	r1, r2				@ divisor is power of 2 ?
-	beq	12f
-
-	ARM_DIV_BODY r3, r1, r0, r2
-
-	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-10:	teq	ip, r0				@ same sign ?
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-11:	movlo	r0, #0
-	moveq	r0, ip, asr #31
-	orreq	r0, r0, #1
-	mov	pc, lr
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	cmp	ip, #0
-	mov	r0, r3, lsr r2
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__divsi3)
-ENDPROC(__aeabi_idiv)
-
-ENTRY(__modsi3)
-UNWIND(.fnstart)
-
-	cmp	r1, #0
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	movs	ip, r0				@ preserve sign of dividend
-	rsbmi	r0, r0, #0			@ if negative make positive
-	subs	r2, r1, #1			@ compare divisor with 1
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq	r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	bls	10f
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-10:	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__modsi3)
-
-#ifdef CONFIG_AEABI
-
-ENTRY(__aeabi_uidivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}	)
-
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_uidiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_uidivmod)
-
-ENTRY(__aeabi_idivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}	)
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_idiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_idivmod)
-
-ENTRY(__aeabi_uldivmod)
-UNWIND(.fnstart)
-UNWIND(.save {lr}	)
-	sub sp, sp, #8
-	stmfd   sp!, {sp, lr}
-	bl __qdivrem
-	ldr lr, [sp, #4]
-	add sp, sp, #8
-	ldmfd sp!, {r2, r3}
-	mov	pc, lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_uldivmod)
-
-ENTRY(__aeabi_ldivmod)
-UNWIND(.fnstart)
-UNWIND(.save {lr}	)
-	sub sp, sp, #16
-	stmfd   sp!, {sp, lr}
-	bl __ldivmod_helper
-	ldr lr, [sp, #4]
-	add sp, sp, #16
-	ldmfd	sp!, {r2, r3}
-	mov	pc, lr
-	
-UNWIND(.fnend)
-ENDPROC(__aeabi_ldivmod)
-#endif
-
-Ldiv0:
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-	str	lr, [sp, #-8]!
-	bl	__div0
-	mov	r0, #0			@ About as wrong as it could be.
-	ldr	pc, [sp], #8
-UNWIND(.fnend)
-ENDPROC(Ldiv0)
diff --git a/xen/arch/arm/lib/lshrdi3.S b/xen/arch/arm/lib/lshrdi3.S
deleted file mode 100644
index 3e8887ef4e..0000000000
--- a/xen/arch/arm/lib/lshrdi3.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-
-#include <xen/config.h>
-#include "assembler.h"
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-ENTRY(__lshrdi3)
-ENTRY(__aeabi_llsr)
-
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, lsr r3
- ARM(	orrmi	al, al, ah, lsl ip	)
- THUMB(	lslmi	r3, ah, ip		)
- THUMB(	orrmi	al, al, r3		)
-	mov	ah, ah, lsr r2
-	mov	pc, lr
-
-ENDPROC(__lshrdi3)
-ENDPROC(__aeabi_llsr)
diff --git a/xen/arch/arm/lib/memcpy.S b/xen/arch/arm/lib/memcpy.S
deleted file mode 100644
index d1ab9fbb55..0000000000
--- a/xen/arch/arm/lib/memcpy.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  linux/arch/arm/lib/memcpy.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	MontaVista Software, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-#include "assembler.h"
-
-#define LDR1W_SHIFT	0
-#define STR1W_SHIFT	0
-
-	.macro ldr1w ptr reg abort
-	W(ldr) \reg, [\ptr], #4
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	ldr\cond\()b \reg, [\ptr], #1
-	.endm
-
-	.macro str1w ptr reg abort
-	W(str) \reg, [\ptr], #4
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	str\cond\()b \reg, [\ptr], #1
-	.endm
-
-	.macro enter reg1 reg2
-	stmdb sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.macro exit reg1 reg2
-	ldmfd sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.text
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-ENTRY(memcpy)
-
-#include "copy_template.S"
-
-ENDPROC(memcpy)
diff --git a/xen/arch/arm/lib/memmove.S b/xen/arch/arm/lib/memmove.S
deleted file mode 100644
index 4e142b8aeb..0000000000
--- a/xen/arch/arm/lib/memmove.S
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- *  linux/arch/arm/lib/memmove.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	(C) MontaVista Software Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-		.text
-
-/*
- * Prototype: void *memmove(void *dest, const void *src, size_t n);
- *
- * Note:
- *
- * If the memory regions don't overlap, we simply branch to memcpy which is
- * normally a bit faster. Otherwise the copy is done going downwards.  This
- * is a transposition of the code from copy_template.S but with the copy
- * occurring in the opposite direction.
- */
-
-ENTRY(memmove)
-
-		subs	ip, r0, r1
-		cmphi	r2, ip
-		bls	memcpy
-
-		stmfd	sp!, {r0, r4, lr}
-		add	r1, r1, r2
-		add	r0, r0, r2
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #-4]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, ip		)  @ C is set here
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #-4]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-3:	PLD(	pld	[r1, #-128]		)
-4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		subs	r2, r2, #32
-		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:		W(nop)
-		W(ldr)	r3, [r1, #-4]!
-		W(ldr)	r4, [r1, #-4]!
-		W(ldr)	r5, [r1, #-4]!
-		W(ldr)	r6, [r1, #-4]!
-		W(ldr)	r7, [r1, #-4]!
-		W(ldr)	r8, [r1, #-4]!
-		W(ldr)	lr, [r1, #-4]!
-
-		add	pc, pc, ip
-		nop
-		W(nop)
-		W(str)	r3, [r0, #-4]!
-		W(str)	r4, [r0, #-4]!
-		W(str)	r5, [r0, #-4]!
-		W(str)	r6, [r0, #-4]!
-		W(str)	r7, [r0, #-4]!
-		W(str)	r8, [r0, #-4]!
-		W(str)	lr, [r0, #-4]!
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-
-8:		movs	r2, r2, lsl #31
-		ldrneb	r3, [r1, #-1]!
-		ldrcsb	r4, [r1, #-1]!
-		ldrcsb	ip, [r1, #-1]
-		strneb	r3, [r0, #-1]!
-		strcsb	r4, [r0, #-1]!
-		strcsb	ip, [r0, #-1]
-		ldmfd	sp!, {r0, r4, pc}
-
-9:		cmp	ip, #2
-		ldrgtb	r3, [r1, #-1]!
-		ldrgeb	r4, [r1, #-1]!
-		ldrb	lr, [r1, #-1]!
-		strgtb	r3, [r0, #-1]!
-		strgeb	r4, [r0, #-1]!
-		subs	r2, r2, ip
-		strb	lr, [r0, #-1]!
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr	r3, [r1, #0]
-		beq	17f
-		blt	18f
-
-
-		.macro	backward_copy_shift push pull
-
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-
-	PLD(	pld	[r1, #-4]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-12:	PLD(	pld	[r1, #-128]		)
-13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
-		subs    r2, r2, #32
-		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
-		stmdb   r0!, {r4 - r9, ip, lr}
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov     lr, r3, push #\push
-		ldr	r3, [r1, #-4]!
-		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
-		str	lr, [r0, #-4]!
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		add	r1, r1, #(\pull / 8)
-		b	8b
-
-		.endm
-
-
-		backward_copy_shift	push=8	pull=24
-
-17:		backward_copy_shift	push=16	pull=16
-
-18:		backward_copy_shift	push=24	pull=8
-
-ENDPROC(memmove)
diff --git a/xen/arch/arm/lib/memset.S b/xen/arch/arm/lib/memset.S
deleted file mode 100644
index d2937a3e0e..0000000000
--- a/xen/arch/arm/lib/memset.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-	.text
-	.align	5
-	.word	0
-
-1:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r1, [r0], #1		@ 1
-	strleb	r1, [r0], #1		@ 1
-	strb	r1, [r0], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memset again.
- */
-
-ENTRY(memset)
-	ands	r3, r0, #3		@ 1 unaligned?
-	bne	1b			@ 1
-/*
- * we know that the pointer in r0 is aligned to a word boundary.
- */
-	orr	r1, r1, r1, lsl #8
-	orr	r1, r1, r1, lsl #16
-	mov	r3, r1
-	cmp	r2, #16
-	blt	4f
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-	str	lr, [sp, #-4]!
-	mov	ip, r1
-	mov	lr, r1
-
-2:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
-	bgt	2b
-	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-	tst	r2, #32
-	stmneia	r0!, {r1, r3, ip, lr}
-	stmneia	r0!, {r1, r3, ip, lr}
-	tst	r2, #16
-	stmneia	r0!, {r1, r3, ip, lr}
-	ldr	lr, [sp], #4
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-	stmfd	sp!, {r4-r7, lr}
-	mov	r4, r1
-	mov	r5, r1
-	mov	r6, r1
-	mov	r7, r1
-	mov	ip, r1
-	mov	lr, r1
-
-	cmp	r2, #96
-	tstgt	r0, #31
-	ble	3f
-
-	and	ip, r0, #31
-	rsb	ip, ip, #32
-	sub	r2, r2, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	tst	ip, #(1 << 30)
-	mov	ip, r1
-	strne	r1, [r0], #4
-
-3:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
-	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
-
-	tst	r2, #32
-	stmneia	r0!, {r1, r3-r7, ip, lr}
-	tst	r2, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
-
-#endif
-
-4:	tst	r2, #8
-	stmneia	r0!, {r1, r3}
-	tst	r2, #4
-	strne	r1, [r0], #4
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:	tst	r2, #2
-	strneb	r1, [r0], #1
-	strneb	r1, [r0], #1
-	tst	r2, #1
-	strneb	r1, [r0], #1
-	mov	pc, lr
-ENDPROC(memset)
diff --git a/xen/arch/arm/lib/memzero.S b/xen/arch/arm/lib/memzero.S
deleted file mode 100644
index ce25acaad1..0000000000
--- a/xen/arch/arm/lib/memzero.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- *  linux/arch/arm/lib/memzero.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/config.h>
-
-#include "assembler.h"
-
-	.text
-	.align	5
-	.word	0
-/*
- * Align the pointer in r0.  r3 contains the number of bytes that we are
- * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
- * don't bother; we use byte stores instead.
- */
-1:	subs	r1, r1, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r2, [r0], #1		@ 1
-	strleb	r2, [r0], #1		@ 1
-	strb	r2, [r0], #1		@ 1
-	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memzero again.
- */
-
-ENTRY(__memzero)
-	mov	r2, #0			@ 1
-	ands	r3, r0, #3		@ 1 unaligned?
-	bne	1b			@ 1
-/*
- * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
- */
-	cmp	r1, #16			@ 1 we can skip this chunk if we
-	blt	4f			@ 1 have < 16 bytes
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-	str	lr, [sp, #-4]!		@ 1
-	mov	ip, r2			@ 1
-	mov	lr, r2			@ 1
-
-3:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
-	bgt	3b			@ 1
-	ldmeqfd	sp!, {pc}		@ 1/2 quick exit
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-	tst	r1, #32			@ 1
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	tst	r1, #16			@ 1 16 bytes or more?
-	stmneia	r0!, {r2, r3, ip, lr}	@ 4
-	ldr	lr, [sp], #4		@ 1
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-	stmfd	sp!, {r4-r7, lr}
-	mov	r4, r2
-	mov	r5, r2
-	mov	r6, r2
-	mov	r7, r2
-	mov	ip, r2
-	mov	lr, r2
-
-	cmp	r1, #96
-	andgts	ip, r0, #31
-	ble	3f
-
-	rsb	ip, ip, #32
-	sub	r1, r1, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	movs	ip, ip, lsl #2
-	strcs	r2, [r0], #4
-
-3:	subs	r1, r1, #64
-	stmgeia	r0!, {r2-r7, ip, lr}
-	stmgeia	r0!, {r2-r7, ip, lr}
-	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
-
-	tst	r1, #32
-	stmneia	r0!, {r2-r7, ip, lr}
-	tst	r1, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
-
-#endif
-
-4:	tst	r1, #8			@ 1 8 bytes or more?
-	stmneia	r0!, {r2, r3}		@ 2
-	tst	r1, #4			@ 1 4 bytes or more?
-	strne	r2, [r0], #4		@ 1
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-5:	tst	r1, #2			@ 1 2 bytes or more?
-	strneb	r2, [r0], #1		@ 1
-	strneb	r2, [r0], #1		@ 1
-	tst	r1, #1			@ 1 a byte left over
-	strneb	r2, [r0], #1		@ 1
-	mov	pc, lr			@ 1
-ENDPROC(__memzero)
diff --git a/xen/arch/arm/lib/setbit.S b/xen/arch/arm/lib/setbit.S
deleted file mode 100644
index c828851553..0000000000
--- a/xen/arch/arm/lib/setbit.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/setbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-	.text
-
-ENTRY(_set_bit)
-	bitop	orr
-ENDPROC(_set_bit)
diff --git a/xen/arch/arm/lib/testchangebit.S b/xen/arch/arm/lib/testchangebit.S
deleted file mode 100644
index a7f527cd98..0000000000
--- a/xen/arch/arm/lib/testchangebit.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testchangebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_change_bit)
-	testop	eor, str
-ENDPROC(_test_and_change_bit)
diff --git a/xen/arch/arm/lib/testclearbit.S b/xen/arch/arm/lib/testclearbit.S
deleted file mode 100644
index 8f39c72936..0000000000
--- a/xen/arch/arm/lib/testclearbit.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testclearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_clear_bit)
-	testop	bicne, strne
-ENDPROC(_test_and_clear_bit)
diff --git a/xen/arch/arm/lib/testsetbit.S b/xen/arch/arm/lib/testsetbit.S
deleted file mode 100644
index 1b8d273100..0000000000
--- a/xen/arch/arm/lib/testsetbit.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  linux/arch/arm/lib/testsetbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <xen/config.h>
-
-#include "assembler.h"
-#include "bitops.h"
-                .text
-
-ENTRY(_test_and_set_bit)
-	testop	orreq, streq
-ENDPROC(_test_and_set_bit)
diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
deleted file mode 100644
index 3dba38df7d..0000000000
--- a/xen/arch/arm/mode_switch.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * xen/arch/arm/mode_switch.S
- *
- * Start-of day code to take a CPU from Secure mode to Hyp mode.
- *
- * Tim Deegan <tim@xen.org>
- * Copyright (c) 2011-2012 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/config.h>
-#include <asm/page.h>
-#include <asm/platform_vexpress.h>
-#include <asm/asm_defns.h>
-#include "gic.h"
-
-
-/* XXX: Versatile Express specific code */
-/* wake up secondary cpus */
-.globl kick_cpus
-kick_cpus:
-        /* write start paddr to v2m sysreg FLAGSSET register */
-        ldr   r0, =(V2M_SYS_MMIO_BASE)        /* base V2M sysreg MMIO address */
-        dsb
-        mov   r2, #0xffffffff
-        str   r2, [r0, #(V2M_SYS_FLAGSCLR)]
-        dsb
-        ldr   r2, =start
-        add   r2, r2, r10
-        str   r2, [r0, #(V2M_SYS_FLAGSSET)]
-        dsb
-        /* send an interrupt */
-        ldr   r0, =(GIC_BASE_ADDRESS + GIC_DR_OFFSET) /* base GICD MMIO address */
-        mov   r2, #0x1
-        str   r2, [r0, #(GICD_CTLR * 4)]      /* enable distributor */
-        mov   r2, #0xfe0000
-        str   r2, [r0, #(GICD_SGIR * 4)]      /* send IPI to everybody */
-        dsb
-        str   r2, [r0, #(GICD_CTLR * 4)]      /* disable distributor */
-        mov   pc, lr
-
-
-/* Get up a CPU into Hyp mode.  Clobbers r0-r3.
- *
- * Expects r12 == CPU number
- *
- * This code is specific to the VE model, and not intended to be used
- * on production systems.  As such it's a bit hackier than the main
- * boot code in head.S.  In future it will be replaced by better
- * integration with the bootloader/firmware so that Xen always starts
- * in Hyp mode. */
-
-.globl enter_hyp_mode
-enter_hyp_mode:
-        mov   r3, lr                 /* Put return address in non-banked reg */
-        cpsid aif, #0x16             /* Enter Monitor mode */
-        mrc   CP32(r0, SCR)
-        orr   r0, r0, #0x100         /* Set HCE */
-        orr   r0, r0, #0xb1          /* Set SCD, AW, FW and NS */
-        bic   r0, r0, #0xe           /* Clear EA, FIQ and IRQ */
-        mcr   CP32(r0, SCR)
-        /* Ugly: the system timer's frequency register is only
-         * programmable in Secure state.  Since we don't know where its
-         * memory-mapped control registers live, we can't find out the
-         * right frequency.  Use the VE model's default frequency here. */
-        ldr   r0, =0x5f5e100         /* 100 MHz */
-        mcr   CP32(r0, CNTFRQ)
-        ldr   r0, =0x40c00           /* SMP, c11, c10 in non-secure mode */
-        mcr   CP32(r0, NSACR)
-        mov   r0, #GIC_BASE_ADDRESS
-        add   r0, r0, #GIC_DR_OFFSET
-        /* Disable the GIC distributor, on the boot CPU only */
-        mov   r1, #0
-        teq   r12, #0                /* Is this the boot CPU? */
-        streq r1, [r0]
-        /* Continuing ugliness: Set up the GIC so NS state owns interrupts,
-         * The first 32 interrupts (SGIs & PPIs) must be configured on all
-         * CPUs while the remainder are SPIs and only need to be done one, on
-         * the boot CPU. */
-        add   r0, r0, #0x80          /* GICD_IGROUP0 */
-        mov   r2, #0xffffffff        /* All interrupts to group 1 */
-        teq   r12, #0                /* Boot CPU? */
-        str   r2, [r0]               /* Interrupts  0-31 (SGI & PPI) */
-        streq r2, [r0, #4]           /* Interrupts 32-63 (SPI) */
-        streq r2, [r0, #8]           /* Interrupts 64-95 (SPI) */
-        /* Disable the GIC CPU interface on all processors */
-        mov   r0, #GIC_BASE_ADDRESS
-        add   r0, r0, #GIC_CR_OFFSET
-        mov   r1, #0
-        str   r1, [r0]
-        /* Must drop priority mask below 0x80 before entering NS state */
-        ldr   r1, =0xff
-        str   r1, [r0, #0x4]         /* -> GICC_PMR */
-        /* Reset a few config registers */
-        mov   r0, #0
-        mcr   CP32(r0, FCSEIDR)
-        mcr   CP32(r0, CONTEXTIDR)
-        /* Allow non-secure access to coprocessors, FIQs, VFP and NEON */
-        ldr   r1, =0x3fff            /* 14 CP bits set, all others clear */
-        mcr   CP32(r1, NSACR)
-
-        mrs   r0, cpsr               /* Copy the CPSR */
-        add   r0, r0, #0x4           /* 0x16 (Monitor) -> 0x1a (Hyp) */
-        msr   spsr_cxsf, r0          /* into the SPSR */
-        movs  pc, r3                 /* Exception-return into Hyp mode */
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 7ae451596e..852f0d879e 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -4,7 +4,7 @@
 #include <xen/errno.h>
 #include <xen/domain_page.h>
 #include <asm/flushtlb.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 void dump_p2m_lookup(struct domain *d, paddr_t addr)
 {
diff --git a/xen/arch/arm/proc-ca15.S b/xen/arch/arm/proc-ca15.S
deleted file mode 100644
index dcdd42e09e..0000000000
--- a/xen/arch/arm/proc-ca15.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * xen/arch/arm/proc-ca15.S
- *
- * Cortex A15 specific initializations
- *
- * Copyright (c) 2011 Citrix Systems.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <asm/asm_defns.h>
-#include <asm/processor-ca15.h>
-
-.globl cortex_a15_init
-cortex_a15_init:
-        /* Set up the SMP bit in ACTLR */
-        mrc   CP32(r0, ACTLR)
-        orr   r0, r0, #(ACTLR_CA15_SMP) /* enable SMP bit */
-        mcr   CP32(r0, ACTLR)
-        mov   pc, lr
-
-/*
- * Local variables:
- * mode: ASM
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index bbee051c1b..b5cb912f0a 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -39,7 +39,7 @@
 #include <asm/setup.h>
 #include <asm/vfp.h>
 #include <asm/early_printk.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 static __used void init_done(void)
 {
diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index 351b559f48..c7a586b3b8 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -29,7 +29,7 @@
 #include <xen/timer.h>
 #include <xen/irq.h>
 #include <asm/vfp.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 cpumask_t cpu_online_map;
 EXPORT_SYMBOL(cpu_online_map);
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index 096dc0bfb3..bddd7d4094 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -35,7 +35,7 @@
 
 #include "io.h"
 #include "vtimer.h"
-#include "gic.h"
+#include <asm/gic.h>
 
 /* The base of the stack must always be double-word aligned, which means
  * that both the kernel half of struct cpu_user_regs (which is pushed in
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 3f7e757a08..7d1a5adaeb 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -27,7 +27,7 @@
 #include <asm/current.h>
 
 #include "io.h"
-#include "gic.h"
+#include <asm/gic.h>
 
 #define VGIC_DISTR_BASE_ADDRESS 0x000000002c001000
 
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 490b021342..1c45f4a9d3 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -21,7 +21,7 @@
 #include <xen/lib.h>
 #include <xen/timer.h>
 #include <xen/sched.h>
-#include "gic.h"
+#include <asm/gic.h>
 
 extern s_time_t ticks_to_ns(uint64_t ticks);
 extern uint64_t ns_to_ticks(s_time_t ns);
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
new file mode 100644
index 0000000000..bf30fbdd74
--- /dev/null
+++ b/xen/include/asm-arm/gic.h
@@ -0,0 +1,170 @@
+/*
+ * ARM Generic Interrupt Controller support
+ *
+ * Tim Deegan <tim@xen.org>
+ * Copyright (c) 2011 Citrix Systems.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ASM_ARM_GIC_H__
+#define __ASM_ARM_GIC_H__
+
+#define GICD_CTLR       (0x000/4)
+#define GICD_TYPER      (0x004/4)
+#define GICD_IIDR       (0x008/4)
+#define GICD_IGROUPR    (0x080/4)
+#define GICD_IGROUPRN   (0x0FC/4)
+#define GICD_ISENABLER  (0x100/4)
+#define GICD_ISENABLERN (0x17C/4)
+#define GICD_ICENABLER  (0x180/4)
+#define GICD_ICENABLERN (0x1fC/4)
+#define GICD_ISPENDR    (0x200/4)
+#define GICD_ISPENDRN   (0x27C/4)
+#define GICD_ICPENDR    (0x280/4)
+#define GICD_ICPENDRN   (0x2FC/4)
+#define GICD_ISACTIVER  (0x300/4)
+#define GICD_ISACTIVERN (0x37C/4)
+#define GICD_ICACTIVER  (0x380/4)
+#define GICD_ICACTIVERN (0x3FC/4)
+#define GICD_IPRIORITYR (0x400/4)
+#define GICD_IPRIORITYRN (0x7F8/4)
+#define GICD_ITARGETSR  (0x800/4)
+#define GICD_ITARGETSRN (0xBF8/4)
+#define GICD_ICFGR      (0xC00/4)
+#define GICD_ICFGRN     (0xCFC/4)
+#define GICD_NSACR      (0xE00/4)
+#define GICD_NSACRN     (0xEFC/4)
+#define GICD_ICPIDR2    (0xFE8/4)
+#define GICD_SGIR       (0xF00/4)
+#define GICD_CPENDSGIR  (0xF10/4)
+#define GICD_CPENDSGIRN (0xF1C/4)
+#define GICD_SPENDSGIR  (0xF20/4)
+#define GICD_SPENDSGIRN (0xF2C/4)
+#define GICD_ICPIDR2    (0xFE8/4)
+
+#define GICC_CTLR       (0x0000/4)
+#define GICC_PMR        (0x0004/4)
+#define GICC_BPR        (0x0008/4)
+#define GICC_IAR        (0x000C/4)
+#define GICC_EOIR       (0x0010/4)
+#define GICC_RPR        (0x0014/4)
+#define GICC_HPPIR      (0x0018/4)
+#define GICC_APR        (0x00D0/4)
+#define GICC_NSAPR      (0x00E0/4)
+#define GICC_DIR        (0x1000/4)
+
+#define GICH_HCR        (0x00/4)
+#define GICH_VTR        (0x04/4)
+#define GICH_VMCR       (0x08/4)
+#define GICH_MISR       (0x10/4)
+#define GICH_EISR0      (0x20/4)
+#define GICH_EISR1      (0x24/4)
+#define GICH_ELSR0      (0x30/4)
+#define GICH_ELSR1      (0x34/4)
+#define GICH_APR        (0xF0/4)
+#define GICH_LR         (0x100/4)
+
+/* Register bits */
+#define GICD_CTL_ENABLE 0x1
+
+#define GICD_TYPE_LINES 0x01f
+#define GICD_TYPE_CPUS  0x0e0
+#define GICD_TYPE_SEC   0x400
+
+#define GICC_CTL_ENABLE 0x1
+#define GICC_CTL_EOI    (0x1 << 9)
+
+#define GICC_IA_IRQ     0x03ff
+#define GICC_IA_CPU     0x1c00
+
+#define GICH_HCR_EN       (1 << 0)
+#define GICH_HCR_UIE      (1 << 1)
+#define GICH_HCR_LRENPIE  (1 << 2)
+#define GICH_HCR_NPIE     (1 << 3)
+#define GICH_HCR_VGRP0EIE (1 << 4)
+#define GICH_HCR_VGRP0DIE (1 << 5)
+#define GICH_HCR_VGRP1EIE (1 << 6)
+#define GICH_HCR_VGRP1DIE (1 << 7)
+
+#define GICH_MISR_EOI     (1 << 0)
+#define GICH_MISR_U       (1 << 1)
+#define GICH_MISR_LRENP   (1 << 2)
+#define GICH_MISR_NP      (1 << 3)
+#define GICH_MISR_VGRP0E  (1 << 4)
+#define GICH_MISR_VGRP0D  (1 << 5)
+#define GICH_MISR_VGRP1E  (1 << 6)
+#define GICH_MISR_VGRP1D  (1 << 7)
+
+#define GICH_LR_VIRTUAL_MASK    0x3ff
+#define GICH_LR_VIRTUAL_SHIFT   0
+#define GICH_LR_PHYSICAL_MASK   0x3ff
+#define GICH_LR_PHYSICAL_SHIFT  10
+#define GICH_LR_STATE_MASK      0x3
+#define GICH_LR_STATE_SHIFT     28
+#define GICH_LR_PRIORITY_SHIFT  23
+#define GICH_LR_MAINTENANCE_IRQ (1<<19)
+#define GICH_LR_PENDING         (1<<28)
+#define GICH_LR_ACTIVE          (1<<29)
+#define GICH_LR_GRP1            (1<<30)
+#define GICH_LR_HW              (1<<31)
+#define GICH_LR_CPUID_SHIFT     9
+#define GICH_VTR_NRLRGS         0x3f
+
+/* XXX: write this into the DT */
+#define VGIC_IRQ_EVTCHN_CALLBACK 31
+
+#ifndef __ASSEMBLY__
+extern int domain_vgic_init(struct domain *d);
+extern void domain_vgic_free(struct domain *d);
+
+extern int vcpu_vgic_init(struct vcpu *v);
+
+extern void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq,int virtual);
+extern struct pending_irq *irq_to_pending(struct vcpu *v, unsigned int irq);
+
+extern void gic_route_ppis(void);
+extern void gic_route_spis(void);
+
+extern void gic_inject(void);
+
+extern void __cpuinit init_maintenance_interrupt(void);
+extern void gic_set_guest_irq(struct vcpu *v, unsigned int irq,
+        unsigned int state, unsigned int priority);
+extern int gic_route_irq_to_guest(struct domain *d, unsigned int irq,
+                                  const char * devname);
+
+/* Accept an interrupt from the GIC and dispatch its handler */
+extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
+/* Bring up the interrupt controller, and report # cpus attached */
+extern void gic_init(void);
+/* Bring up a secondary CPU's per-CPU GIC interface */
+extern void gic_init_secondary_cpu(void);
+/* Take down a CPU's per-CPU GIC interface */
+extern void gic_disable_cpu(void);
+/* setup the gic virtual interface for a guest */
+extern int gicv_setup(struct domain *d);
+
+/* Context switch */
+extern void gic_save_state(struct vcpu *v);
+extern void gic_restore_state(struct vcpu *v);
+
+#endif /* __ASSEMBLY__ */
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
cgit v1.2.3