Start fleshing out -sparse tree for xenlinux/ia64

author: djm@kirby.fc.hp.com <djm@kirby.fc.hp.com> 2005-09-21 09:06:30 -0600
committer: djm@kirby.fc.hp.com <djm@kirby.fc.hp.com> 2005-09-21 09:06:30 -0600
commit: fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5 (patch)
tree: f27e2f3e1d2dcb8bdd9edfc533c42647d10361f7
parent: 3c0d086543c9433d8f2fc33f1afd001fa31d3878 (diff)
download: xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.tar.gz
xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.tar.bz2
xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.zip
25 files changed, 12189 insertions, 4 deletions
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig
new file mode 100644
index 0000000000..313ef812bc
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig
@@ -0,0 +1,460 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "IA-64 Linux Kernel Configuration"
+
+source "init/Kconfig"
+
+menu "Processor type and features"
+
+config IA64
+	bool
+	default y
+	help
+	  The Itanium Processor Family is Intel's 64-bit successor to
+	  the 32-bit X86 line.  The IA-64 Linux project has a home
+	  page at <http://www.linuxia64.org/> and a mailing list at
+	  <linux-ia64@vger.kernel.org>.
+
+config 64BIT
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config TIME_INTERPOLATION
+	bool
+	default y
+
+config EFI
+	bool
+	default y
+
+config GENERIC_IOMAP
+	bool
+	default y
+
+config XEN
+	bool
+	default y
+	help
+	  Enable Xen hypervisor support.  Resulting kernel runs
+	  both as a guest OS on Xen and natively on hardware.
+
+config XEN_PHYSDEV_ACCESS
+	depends on XEN
+	bool
+	default y
+
+config XEN_BLKDEV_GRANT
+	depends on XEN
+	bool
+	default y
+
+config SCHED_NO_NO_OMIT_FRAME_POINTER
+	bool
+	default y
+
+choice
+	prompt "System type"
+	default IA64_GENERIC
+
+config IA64_GENERIC
+	bool "generic"
+	select NUMA
+	select ACPI_NUMA
+	select VIRTUAL_MEM_MAP
+	select DISCONTIGMEM
+	help
+	  This selects the system type of your hardware.  A "generic" kernel
+	  will run on any supported IA-64 system.  However, if you configure
+	  a kernel for your specific system, it will be faster and smaller.
+
+	  generic		For any supported IA-64 system
+	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  HP-zx1/sx1000		For HP systems
+	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
+	  SGI-SN2		For SGI Altix systems
+	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+
+	  If you don't know what to do, choose "generic".
+
+config IA64_DIG
+	bool "DIG-compliant"
+
+config IA64_HP_ZX1
+	bool "HP-zx1/sx1000"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems.  This adds
+	  support for the HP I/O MMU.
+
+config IA64_HP_ZX1_SWIOTLB
+	bool "HP-zx1/sx1000 with software I/O TLB"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
+	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
+	  from support for the HP I/O MMU, this includes support for the software
+	  I/O TLB, which allows supporting the broken devices at the expense of
+	  wasting some kernel memory (about 2MB by default).
+
+config IA64_SGI_SN2
+	bool "SGI-SN2"
+	help
+	  Selecting this option will optimize the kernel for use on sn2 based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI Altix system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
+config IA64_HP_SIM
+	bool "Ski-simulator"
+
+endchoice
+
+choice
+	prompt "Processor type"
+	default ITANIUM
+
+config ITANIUM
+	bool "Itanium"
+	help
+	  Select your IA-64 processor type.  The default is Itanium.
+	  This choice is safe for all IA-64 systems, but may not perform
+	  optimally on systems with, say, Itanium 2 or newer processors.
+
+config MCKINLEY
+	bool "Itanium 2"
+	help
+	  Select this to configure for an Itanium 2 (McKinley) processor.
+
+endchoice
+
+choice
+	prompt "Kernel page size"
+	default IA64_PAGE_SIZE_16KB
+
+config IA64_PAGE_SIZE_4KB
+	bool "4KB"
+	help
+	  This lets you select the page size of the kernel.  For best IA-64
+	  performance, a page size of 8KB or 16KB is recommended.  For best
+	  IA-32 compatibility, a page size of 4KB should be selected (the vast
+	  majority of IA-32 binaries work perfectly fine with a larger page
+	  size).  For Itanium 2 or newer systems, a page size of 64KB can also
+	  be selected.
+
+	  4KB                For best IA-32 compatibility
+	  8KB                For best IA-64 performance
+	  16KB               For best IA-64 performance
+	  64KB               Requires Itanium 2 or newer processor.
+
+	  If you don't know what to do, choose 16KB.
+
+config IA64_PAGE_SIZE_8KB
+	bool "8KB"
+
+config IA64_PAGE_SIZE_16KB
+	bool "16KB"
+
+config IA64_PAGE_SIZE_64KB
+	depends on !ITANIUM
+	bool "64KB"
+
+endchoice
+
+config IA64_BRL_EMU
+	bool
+	depends on ITANIUM
+	default y
+
+# align cache-sensitive data to 128 bytes
+config IA64_L1_CACHE_SHIFT
+	int
+	default "7" if MCKINLEY
+	default "6" if ITANIUM
+
+# align cache-sensitive data to 64 bytes
+config NUMA
+	bool "NUMA support"
+	depends on !IA64_HP_SIM
+	default y if IA64_SGI_SN2
+	select ACPI_NUMA
+	help
+	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+	  Access).  This option is for configuring high-end multiprocessor
+	  server systems.  If in doubt, say N.
+
+config VIRTUAL_MEM_MAP
+	bool "Virtual mem map"
+	default y if !IA64_HP_SIM
+	help
+	  Say Y to compile the kernel with support for a virtual mem map.
+	  This code also only takes effect if a memory hole of greater than
+	  1 Gb is found during boot.  You must turn this option on if you
+	  require the DISCONTIGMEM option for your machine. If you are
+	  unsure, say Y.
+
+config HOLES_IN_ZONE
+	bool
+	default y if VIRTUAL_MEM_MAP
+
+config DISCONTIGMEM
+	bool "Discontiguous memory support"
+	depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP
+	default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for architectures which are either NUMA (Non-Uniform Memory Access)
+	  or have huge holes in the physical address space for other reasons.
+	  See <file:Documentation/vm/numa> for more.
+
+config IA64_CYCLONE
+	bool "Cyclone (EXA) Time Source support"
+	help
+	  Say Y here to enable support for IBM EXA Cyclone time source.
+	  If you're unsure, answer N.
+
+config IOSAPIC
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+config IA64_SGI_SN_SIM
+	bool "SGI Medusa Simulator Support"
+	depends on IA64_SGI_SN2
+	help
+	  If you are compiling a kernel that will run under SGI's IA-64
+	  simulator (Medusa) then say Y, otherwise say N.
+
+config IA64_SGI_SN_XP
+	tristate "Support communication between SGI SSIs"
+	depends on MSPEC
+	help
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow for direct communication between SSIs
+	  based on a network adapter and DMA messaging.
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "18"
+
+config SMP
+	bool "Symmetric multi-processing support"
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N.  If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  systems, but will use only one CPU of a multiprocessor system.  If
+	  you say Y here, the kernel will run on many, but not all,
+	  single processor systems.  On a single processor system, the kernel
+	  will run faster if you say N here.
+
+	  See also the <file:Documentation/smp.txt> and the SMP-HOWTO
+	  available at <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-512)"
+	range 2 512
+	depends on SMP
+	default "64"
+	help
+	  You should set this to the number of CPUs in your system, but
+	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
+	  only use 2 CPUs on a >2 CPU system.  Setting this to a value larger
+	  than 64 will cause the use of a CPU mask array, causing a small
+	  performance hit.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && EXPERIMENTAL
+	select HOTPLUG
+	default n
+	---help---
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	default off
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  Intel IA64 chips with MultiThreading at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
+config PREEMPT
+	bool "Preemptible Kernel"
+        help
+          This option reduces the latency of the kernel when reacting to
+          real-time or interactive events by allowing a low priority process to
+          be preempted even if it is in kernel mode executing a system call.
+          This allows applications to run more reliably even when the system is
+          under load.
+
+          Say Y here if you are building a kernel for a desktop, embedded
+          or real-time system.  Say N if you are unsure.
+
+config HAVE_DEC_LOCK
+	bool
+	depends on (SMP || PREEMPT)
+	default y
+
+config IA32_SUPPORT
+	bool "Support for Linux/x86 binaries"
+	help
+	  IA-64 processors can execute IA-32 (X86) instructions.  By
+	  saying Y here, the kernel will include IA-32 system call
+	  emulation support which makes it possible to transparently
+	  run IA-32 Linux binaries on an IA-64 Linux system.
+	  If in doubt, say Y.
+
+config COMPAT
+	bool
+	depends on IA32_SUPPORT
+	default y
+
+config IA64_MCA_RECOVERY
+	tristate "MCA recovery from errors other than TLB."
+
+config PERFMON
+	bool "Performance monitor support"
+	help
+	  Selects whether support for the IA-64 performance monitor hardware
+	  is included in the kernel.  This makes some kernel data-structures a
+	  little bigger and slows down execution a bit, but it is generally
+	  a good idea to turn this on.  If you're unsure, say Y.
+
+config IA64_PALINFO
+	tristate "/proc/pal support"
+	help
+	  If you say Y here, you are able to get PAL (Processor Abstraction
+	  Layer) information in /proc/pal.  This contains useful information
+	  about the processors in your systems, such as cache and TLB sizes
+	  and the PAL firmware version in use.
+
+	  To use this option, you have to ensure that the "/proc file system
+	  support" (CONFIG_PROC_FS) is enabled, too.
+
+config ACPI_DEALLOCATE_IRQ
+	bool
+	depends on IOSAPIC && EXPERIMENTAL
+	default y
+
+source "drivers/firmware/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+menu "Power management and ACPI"
+
+config PM
+	bool "Power Management support"
+	depends on !IA64_HP_SIM
+	default y
+	help
+	  "Power Management" means that parts of your computer are shut
+	  off or put into a power conserving "sleep" mode if they are not
+	  being used.  There are two competing standards for doing this: APM
+	  and ACPI.  If you want to use either one, say Y here and then also
+	  to the requisite support below.
+
+	  Power Management is most important for battery powered laptop
+	  computers; if you have a laptop, check out the Linux Laptop home
+	  page on the WWW at <http://www.linux-on-laptops.com/> and the
+	  Battery Powered Linux mini-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that, even if you say N here, Linux on the x86 architecture
+	  will issue the hlt instruction if nothing is to be done, thereby
+	  sending the processor to sleep and saving power.
+
+config ACPI
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+if !IA64_HP_SIM
+
+source "drivers/acpi/Kconfig"
+
+endif
+
+endmenu
+
+if !IA64_HP_SIM
+
+menu "Bus options (PCI, PCMCIA)"
+
+config PCI
+	bool "PCI support"
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+	  VESA. If you have PCI, say Y, otherwise N.
+
+	  The PCI-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>, contains valuable
+	  information about which PCI hardware does work under Linux and which
+	  doesn't.
+
+config PCI_DOMAINS
+	bool
+	default PCI
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+endif
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+source "arch/ia64/hp/sim/Kconfig"
+
+source "arch/ia64/oprofile/Kconfig"
+
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
diff --git a/linux-2.6-xen-sparse/arch/ia64/Makefile b/linux-2.6-xen-sparse/arch/ia64/Makefile
new file mode 100644
index 0000000000..c7a4d7eec7
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/Makefile
@@ -0,0 +1,141 @@
+#
+# ia64/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+NM := $(CROSS_COMPILE)nm -B
+READELF := $(CROSS_COMPILE)readelf
+
+# following is temporary pending xen directory restructuring
+NOSTDINC_FLAGS += -Iinclude/asm-xen
+
+export AWK
+
+CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
+
+OBJCOPYFLAGS	:= --strip-all
+LDFLAGS_vmlinux	:= -static
+LDFLAGS_MODULE	+= -T $(srctree)/arch/ia64/module.lds
+AFLAGS_KERNEL	:= -mconstant-gp
+EXTRA		:=
+
+cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
+		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+CFLAGS_KERNEL	:= -mconstant-gp
+
+GCC_VERSION     := $(call cc-version)
+GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
+
+ifeq ($(GAS_STATUS),buggy)
+$(error Sorry, you need a newer version of the assember, one that is built from	\
+	a source-tree that post-dates 18-Dec-2002.  You can find a pre-compiled	\
+	static binary of such an assembler at:					\
+										\
+		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
+endif
+
+ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
+$(error Sorry, your compiler is too old.  GCC v2.96 is known to generate bad code.)
+endif
+
+ifeq ($(GCC_VERSION),0304)
+	cflags-$(CONFIG_ITANIUM)	+= -mtune=merced
+	cflags-$(CONFIG_MCKINLEY)	+= -mtune=mckinley
+endif
+
+CFLAGS += $(cflags-y)
+head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
+
+libs-y				+= arch/ia64/lib/
+core-y				+= arch/ia64/kernel/ arch/ia64/mm/
+core-$(CONFIG_IA32_SUPPORT)	+= arch/ia64/ia32/
+core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
+core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
+core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+core-$(CONFIG_XEN)		+= arch/ia64/xen/ arch/ia64/hp/sim/
+
+drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
+drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
+drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
+drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
+drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
+
+boot := arch/ia64/hp/sim/boot
+
+.PHONY: boot compressed check
+
+all: compressed unwcheck
+
+compressed: vmlinux.gz
+
+vmlinux.gz: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+unwcheck: vmlinux
+	-$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader
+CLEAN_FILES += include/asm-xen/xen-public include/asm-ia64/xen/asm-xsi-offsets.h
+CLEAN_FILES += include/asm-xen/linux-public/xenstored.h
+CLEAN_FILES += include/asm-xen/linux-public include/asm-xen/asm-ia64/hypervisor.h
+
+MRPROPER_FILES += include/asm-ia64/offsets.h
+
+prepare: include/asm-ia64/offsets.h
+
+arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER
+
+include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s
+	$(call filechk,gen-asm-offsets)
+
+arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp
+
+XEN_PATH ?= $(srctree)/../xen-ia64-unstable.hg/
+include/asm-ia64/.offsets.h.stamp:
+	mkdir -p include/asm-ia64
+	[ -s include/asm-ia64/offsets.h ] \
+	 || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h
+	touch $@
+	[ -e include/asm-xen/asm ] \
+	 || ln -s asm-ia64 include/asm-xen/asm
+	[ -e include/asm-xen/xen-public ] \
+	 || ln -s $(XEN_PATH)/xen/include/public \
+		include/asm-xen/xen-public
+	[ -e include/asm-ia64/xen/asm-xsi-offsets.h ] \
+	 || ln -s $(XEN_PATH)/xen/include/asm-ia64/asm-xsi-offsets.h \
+		include/asm-ia64/xen/asm-xsi-offsets.h
+	[ -e include/asm-xen/linux-public ] \
+	 || ln -s $(XEN_PATH)/linux-2.6-xen-sparse/include/asm-xen/linux-public \
+		include/asm-xen/linux-public
+	[ -e include/asm-xen/linux-public/xenstored.h ] \
+	 || ln -s $(XEN_PATH)/tools/xenstore/xenstored.h \
+		include/asm-xen/linux-public/xenstored.h
+	[ -e include/asm-xen/asm-ia64/hypervisor.h ] \
+	 || ln -s $(XEN_PATH)/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h \
+		include/asm-xen/asm-ia64/hypervisor.h
+
+
+boot:	lib/lib.a vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+install: vmlinux.gz
+	sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
+
+define archhelp
+  echo '* compressed	- Build compressed kernel image'
+  echo '  install	- Install compressed kernel image'
+  echo '  boot		- Build vmlinux and bootloader for Ski simulator'
+  echo '* unwcheck	- Check vmlinux for invalid unwind info'
+endef
diff --git a/linux-2.6-xen-sparse/arch/ia64/hp/sim/Makefile b/linux-2.6-xen-sparse/arch/ia64/hp/sim/Makefile
new file mode 100644
index 0000000000..b0916cdf9e
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/hp/sim/Makefile
@@ -0,0 +1,18 @@
+#
+# ia64/platform/hp/sim/Makefile
+#
+# Copyright (C) 2002 Hewlett-Packard Co.
+#	David Mosberger-Tang <davidm@hpl.hp.com>
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+obj-y := hpsim_irq.o hpsim_setup.o hpsim.o
+obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
+
+obj-$(CONFIG_HP_SIMETH)	+= simeth.o
+obj-$(CONFIG_HP_SIMSERIAL) += simserial.o
+obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o
+obj-$(CONFIG_HP_SIMSCSI) += simscsi.o
+obj-$(CONFIG_XEN) += simserial.o
+obj-$(CONFIG_XEN) += hpsim_console.o
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
new file mode 100644
index 0000000000..c77200dcc0
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
@@ -0,0 +1,1593 @@
+/*
+ * ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999, 2002-2003
+ *	Asit Mallick <Asit.K.Mallick@intel.com>
+ * 	Don Dugger <Don.Dugger@intel.com>
+ *	Suresh Siddha <suresh.b.siddha@intel.com>
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+/*
+ * ia64_switch_to now places correct virtual mapping in in TR2 for
+ * kernel stack. This allows us to handle interrupts without changing
+ * to physical mode.
+ *
+ * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
+ * Patrick O'Rourke	<orourke@missioncriticallinux.com>
+ * 11/07/2000
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ *	pKStk:		See entry.h.
+ *	pUStk:		See entry.h.
+ *	pSys:		See entry.h.
+ *	pNonSys:	!pSys
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#include "minstate.h"
+
+	/*
+	 * execve() is special because in case of success, we need to
+	 * setup a null register window frame.
+	 */
+ENTRY(ia64_execve)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,4,0
+	mov loc0=rp
+	.body
+	mov out0=in0			// filename
+	;;				// stop bit between alloc and call
+	mov out1=in1			// argv
+	mov out2=in2			// envp
+	add out3=16,sp			// regs
+	br.call.sptk.many rp=sys_execve
+.ret0:
+#ifdef CONFIG_IA32_SUPPORT
+	/*
+	 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
+	 * from pt_regs.
+	 */
+	adds r16=PT(CR_IPSR)+16,sp
+	;;
+	ld8 r16=[r16]
+#endif
+	cmp4.ge p6,p7=r8,r0
+	mov ar.pfs=loc1			// restore ar.pfs
+	sxt4 r8=r8			// return 64-bit result
+	;;
+	stf.spill [sp]=f0
+(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
+	mov rp=loc0
+(p6)	mov ar.pfs=r0			// clear ar.pfs on success
+(p7)	br.ret.sptk.many rp
+
+	/*
+	 * In theory, we'd have to zap this state only to prevent leaking of
+	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
+	 * this executes in less than 20 cycles even on Itanium, so it's not worth
+	 * optimizing for...).
+	 */
+	mov ar.unat=0; 		mov ar.lc=0
+	mov r4=0;		mov f2=f0;		mov b1=r0
+	mov r5=0;		mov f3=f0;		mov b2=r0
+	mov r6=0;		mov f4=f0;		mov b3=r0
+	mov r7=0;		mov f5=f0;		mov b4=r0
+	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
+	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
+	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
+	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
+	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
+	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
+	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
+#ifdef CONFIG_IA32_SUPPORT
+	tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
+	movl loc0=ia64_ret_from_ia32_execve
+	;;
+(p6)	mov rp=loc0
+#endif
+	br.ret.sptk.many rp
+END(ia64_execve)
+
+/*
+ * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
+ *	      u64 tls)
+ */
+GLOBAL_ENTRY(sys_clone2)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc r16=ar.pfs,8,2,6,0
+	DO_SAVE_SWITCH_STACK
+	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	.body
+	mov out1=in1
+	mov out3=in2
+	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	;;
+(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
+	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out0=in0				// out0 = clone_flags
+	br.call.sptk.many rp=do_fork
+.ret1:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(sys_clone2)
+
+/*
+ * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
+ *	Deprecated.  Use sys_clone2() instead.
+ */
+GLOBAL_ENTRY(sys_clone)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc r16=ar.pfs,8,2,6,0
+	DO_SAVE_SWITCH_STACK
+	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
+	mov loc0=rp
+	mov loc1=r16				// save ar.pfs across do_fork
+	.body
+	mov out1=in1
+	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
+	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	;;
+(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
+	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out0=in0				// out0 = clone_flags
+	br.call.sptk.many rp=do_fork
+.ret2:	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(sys_clone)
+
+/*
+ * prev_task <- ia64_switch_to(struct task_struct *next)
+ *	With Ingo's new scheduler, interrupts are disabled when this routine gets
+ *	called.  The code starting at .map relies on this.  The rest of the code
+ *	doesn't care about the interrupt masking status.
+ */
+GLOBAL_ENTRY(__ia64_switch_to)
+	.prologue
+	alloc r16=ar.pfs,1,0,0,0
+	DO_SAVE_SWITCH_STACK
+	.body
+
+	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	movl r25=init_task
+	mov r27=IA64_KR(CURRENT_STACK)
+	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+	dep r20=0,in0,61,3		// physical address of "next"
+	;;
+	st8 [r22]=sp			// save kernel stack pointer of old task
+	shr.u r26=r20,IA64_GRANULE_SHIFT
+	cmp.eq p7,p6=r25,in0
+	;;
+	/*
+	 * If we've already mapped this task's page, we can skip doing it again.
+	 */
+(p6)	cmp.eq p7,p6=r26,r27
+(p6)	br.cond.dpnt .map
+	;;
+.done:
+(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
+	;;
+(p6)	srlz.d
+	ld8 sp=[r21]			// load kernel stack pointer of new task
+	mov IA64_KR(CURRENT)=in0	// update "current" application register
+	mov r8=r13			// return pointer to previously running task
+	mov r13=in0			// set "current" pointer
+	;;
+	DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+	br.ret.sptk.many rp		// boogie on out in new context
+
+.map:
+	rsm psr.ic			// interrupts (psr.i) are already disabled here
+	movl r25=PAGE_KERNEL
+	;;
+	srlz.d
+	or r23=r25,r20			// construct PA | page properties
+	mov r25=IA64_GRANULE_SHIFT<<2
+	;;
+	mov cr.itir=r25
+	mov cr.ifa=in0			// VA of next task...
+	;;
+	mov r25=IA64_TR_CURRENT_STACK
+	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
+	;;
+	itr.d dtr[r25]=r23		// wire in new mapping...
+	br.cond.sptk .done
+END(__ia64_switch_to)
+
+/*
+ * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
+ * means that we may get an interrupt with "sp" pointing to the new kernel stack while
+ * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
+ * problem.  Also, we don't need to specify unwind information for preserved registers
+ * that are not modified in save_switch_stack as the right unwind information is already
+ * specified at the call-site of save_switch_stack.
+ */
+
+/*
+ * save_switch_stack:
+ *	- r16 holds ar.pfs
+ *	- b7 holds address to return to
+ *	- rp (b0) holds return address to save
+ */
+GLOBAL_ENTRY(save_switch_stack)
+	.prologue
+	.altrp b7
+	flushrs			// flush dirty regs to backing store (must be first in insn group)
+	.save @priunat,r17
+	mov r17=ar.unat		// preserve caller's
+	.body
+#ifdef CONFIG_ITANIUM
+	adds r2=16+128,sp
+	adds r3=16+64,sp
+	adds r14=SW(R4)+16,sp
+	;;
+	st8.spill [r14]=r4,16		// spill r4
+	lfetch.fault.excl.nt1 [r3],128
+	;;
+	lfetch.fault.excl.nt1 [r2],128
+	lfetch.fault.excl.nt1 [r3],128
+	;;
+	lfetch.fault.excl [r2]
+	lfetch.fault.excl [r3]
+	adds r15=SW(R5)+16,sp
+#else
+	add r2=16+3*128,sp
+	add r3=16,sp
+	add r14=SW(R4)+16,sp
+	;;
+	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
+	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
+	;;
+	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
+	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
+	;;
+	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
+	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
+	adds r15=SW(R5)+16,sp
+#endif
+	;;
+	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
+	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
+	add r2=SW(F2)+16,sp		// r2 = &sw->f2
+	;;
+	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
+	mov.m r18=ar.fpsr		// preserve fpsr
+	add r3=SW(F3)+16,sp		// r3 = &sw->f3
+	;;
+	stf.spill [r2]=f2,32
+	mov.m r19=ar.rnat
+	mov r21=b0
+
+	stf.spill [r3]=f3,32
+	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
+	mov r22=b1
+	;;
+	// since we're done with the spills, read and save ar.unat:
+	mov.m r29=ar.unat
+	mov.m r20=ar.bspstore
+	mov r23=b2
+	stf.spill [r2]=f4,32
+	stf.spill [r3]=f5,32
+	mov r24=b3
+	;;
+	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
+	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
+	mov r25=b4
+	mov r26=b5
+	;;
+	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
+	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
+	mov r21=ar.lc		// I-unit
+	stf.spill [r2]=f12,32
+	stf.spill [r3]=f13,32
+	;;
+	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
+	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
+	stf.spill [r2]=f14,32
+	stf.spill [r3]=f15,32
+	;;
+	st8 [r14]=r26				// save b5
+	st8 [r15]=r21				// save ar.lc
+	stf.spill [r2]=f16,32
+	stf.spill [r3]=f17,32
+	;;
+	stf.spill [r2]=f18,32
+	stf.spill [r3]=f19,32
+	;;
+	stf.spill [r2]=f20,32
+	stf.spill [r3]=f21,32
+	;;
+	stf.spill [r2]=f22,32
+	stf.spill [r3]=f23,32
+	;;
+	stf.spill [r2]=f24,32
+	stf.spill [r3]=f25,32
+	;;
+	stf.spill [r2]=f26,32
+	stf.spill [r3]=f27,32
+	;;
+	stf.spill [r2]=f28,32
+	stf.spill [r3]=f29,32
+	;;
+	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
+	stf.spill [r3]=f31,SW(PR)-SW(F31)
+	add r14=SW(CALLER_UNAT)+16,sp
+	;;
+	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
+	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
+	mov r21=pr
+	;;
+	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
+	st8 [r3]=r21				// save predicate registers
+	;;
+	st8 [r2]=r20				// save ar.bspstore
+	st8 [r14]=r18				// save fpsr
+	mov ar.rsc=3		// put RSE back into eager mode, pl 0
+	br.cond.sptk.many b7
+END(save_switch_stack)
+
+/*
+ * load_switch_stack:
+ *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
+ *	- b7 holds address to return to
+ *	- must not touch r8-r11
+ */
+GLOBAL_ENTRY(load_switch_stack)
+	.prologue
+	.altrp b7
+
+	.body
+	lfetch.fault.nt1 [sp]
+	adds r2=SW(AR_BSPSTORE)+16,sp
+	adds r3=SW(AR_UNAT)+16,sp
+	mov ar.rsc=0						// put RSE into enforced lazy mode
+	adds r14=SW(CALLER_UNAT)+16,sp
+	adds r15=SW(AR_FPSR)+16,sp
+	;;
+	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
+	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
+	;;
+	ld8 r21=[r2],16		// restore b0
+	ld8 r22=[r3],16		// restore b1
+	;;
+	ld8 r23=[r2],16		// restore b2
+	ld8 r24=[r3],16		// restore b3
+	;;
+	ld8 r25=[r2],16		// restore b4
+	ld8 r26=[r3],16		// restore b5
+	;;
+	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
+	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
+	;;
+	ld8 r28=[r2]		// restore pr
+	ld8 r30=[r3]		// restore rnat
+	;;
+	ld8 r18=[r14],16	// restore caller's unat
+	ld8 r19=[r15],24	// restore fpsr
+	;;
+	ldf.fill f2=[r14],32
+	ldf.fill f3=[r15],32
+	;;
+	ldf.fill f4=[r14],32
+	ldf.fill f5=[r15],32
+	;;
+	ldf.fill f12=[r14],32
+	ldf.fill f13=[r15],32
+	;;
+	ldf.fill f14=[r14],32
+	ldf.fill f15=[r15],32
+	;;
+	ldf.fill f16=[r14],32
+	ldf.fill f17=[r15],32
+	;;
+	ldf.fill f18=[r14],32
+	ldf.fill f19=[r15],32
+	mov b0=r21
+	;;
+	ldf.fill f20=[r14],32
+	ldf.fill f21=[r15],32
+	mov b1=r22
+	;;
+	ldf.fill f22=[r14],32
+	ldf.fill f23=[r15],32
+	mov b2=r23
+	;;
+	mov ar.bspstore=r27
+	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
+	mov b3=r24
+	;;
+	ldf.fill f24=[r14],32
+	ldf.fill f25=[r15],32
+	mov b4=r25
+	;;
+	ldf.fill f26=[r14],32
+	ldf.fill f27=[r15],32
+	mov b5=r26
+	;;
+	ldf.fill f28=[r14],32
+	ldf.fill f29=[r15],32
+	mov ar.pfs=r16
+	;;
+	ldf.fill f30=[r14],32
+	ldf.fill f31=[r15],24
+	mov ar.lc=r17
+	;;
+	ld8.fill r4=[r14],16
+	ld8.fill r5=[r15],16
+	mov pr=r28,-1
+	;;
+	ld8.fill r6=[r14],16
+	ld8.fill r7=[r15],16
+
+	mov ar.unat=r18				// restore caller's unat
+	mov ar.rnat=r30				// must restore after bspstore but before rsc!
+	mov ar.fpsr=r19				// restore fpsr
+	mov ar.rsc=3				// put RSE back into eager mode, pl 0
+	br.cond.sptk.many b7
+END(load_switch_stack)
+
+GLOBAL_ENTRY(__ia64_syscall)
+	.regstk 6,0,0,0
+	mov r15=in5				// put syscall number in place
+	break __BREAK_SYSCALL
+	movl r2=errno
+	cmp.eq p6,p7=-1,r10
+	;;
+(p6)	st4 [r2]=r8
+(p6)	mov r8=-1
+	br.ret.sptk.many rp
+END(__ia64_syscall)
+
+GLOBAL_ENTRY(execve)
+	mov r15=__NR_execve			// put syscall number in place
+	break __BREAK_SYSCALL
+	br.ret.sptk.many rp
+END(execve)
+
+GLOBAL_ENTRY(clone)
+	mov r15=__NR_clone			// put syscall number in place
+	break __BREAK_SYSCALL
+	br.ret.sptk.many rp
+END(clone)
+
+	/*
+	 * Invoke a system call, but do some tracing before and after the call.
+	 * We MUST preserve the current register frame throughout this routine
+	 * because some system calls (such as ia64_execve) directly
+	 * manipulate ar.pfs.
+	 */
+GLOBAL_ENTRY(__ia64_trace_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * We need to preserve the scratch registers f6-f11 in case the system
+	 * call is sigreturn.
+	 */
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+ 	stf.spill [r16]=f6,32
+ 	stf.spill [r17]=f7,32
+	;;
+ 	stf.spill [r16]=f8,32
+ 	stf.spill [r17]=f9,32
+	;;
+ 	stf.spill [r16]=f10
+ 	stf.spill [r17]=f11
+	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+	ldf.fill f6=[r16],32
+	ldf.fill f7=[r17],32
+	;;
+	ldf.fill f8=[r16],32
+	ldf.fill f9=[r17],32
+	;;
+	ldf.fill f10=[r16]
+	ldf.fill f11=[r17]
+	// the syscall number may have changed, so re-load it and re-calculate the
+	// syscall entry-point:
+	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
+	;;
+	ld8 r15=[r15]
+	mov r3=NR_syscalls - 1
+	;;
+	adds r15=-1024,r15
+	movl r16=sys_call_table
+	;;
+	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
+	cmp.leu p6,p7=r15,r3
+	;;
+(p6)	ld8 r20=[r20]				// load address of syscall entry point
+(p7)	movl r20=sys_ni_syscall
+	;;
+	mov b6=r20
+	br.call.sptk.many rp=b6			// do the syscall
+.strace_check_retval:
+	cmp.lt p6,p0=r8,r0			// syscall failed?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
+	;;					// avoid RAW on r10
+.strace_save_retval:
+.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
+.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
+	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+.ret3:	br.cond.sptk .work_pending_syscall_end
+
+strace_error:
+	ld8 r3=[r2]				// load pt_regs.r8
+	sub r9=0,r8				// negate return value to get errno value
+	;;
+	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
+	adds r3=16,r2				// r3=&pt_regs.r10
+	;;
+(p6)	mov r10=-1
+(p6)	mov r8=r9
+	br.cond.sptk .strace_save_retval
+END(__ia64_trace_syscall)
+
+	/*
+	 * When traced and returning from sigreturn, we invoke syscall_trace but then
+	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
+	 */
+GLOBAL_ENTRY(ia64_strace_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+}
+.ret4:	br.cond.sptk ia64_leave_kernel
+END(ia64_strace_leave_kernel)
+
+GLOBAL_ENTRY(ia64_ret_from_clone)
+	PT_REGS_UNWIND_INFO(0)
+{	/*
+	 * Some versions of gas generate bad unwind info if the first instruction of a
+	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
+	 */
+	nop.m 0
+	nop.i 0
+	/*
+	 * We need to call schedule_tail() to complete the scheduling process.
+	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
+	 * address of the previously executing task.
+	 */
+	br.call.sptk.many rp=ia64_invoke_schedule_tail
+}
+.ret8:
+	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+	ld4 r2=[r2]
+	;;
+	mov r8=0
+	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
+	;;
+	cmp.ne p6,p0=r2,r0
+(p6)	br.cond.spnt .strace_check_retval
+	;;					// added stop bits to prevent r8 dependency
+END(ia64_ret_from_clone)
+	// fall through
+GLOBAL_ENTRY(ia64_ret_from_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	mov r10=r0				// clear error indication in r10
+(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
+	;;
+	// don't fall through, ia64_leave_syscall may be #define'd
+	br.cond.sptk.few ia64_leave_syscall
+	;;
+END(ia64_ret_from_syscall)
+/*
+ * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
+ *	need to switch to bank 0 and doesn't restore the scratch registers.
+ *	To avoid leaking kernel bits, the scratch registers are set to
+ *	the following known-to-be-safe values:
+ *
+ *		  r1: restored (global pointer)
+ *		  r2: cleared
+ *		  r3: 1 (when returning to user-level)
+ *	      r8-r11: restored (syscall return value(s))
+ *		 r12: restored (user-level stack pointer)
+ *		 r13: restored (user-level thread pointer)
+ *		 r14: cleared
+ *		 r15: restored (syscall #)
+ *	     r16-r17: cleared
+ *		 r18: user-level b6
+ *		 r19: cleared
+ *		 r20: user-level ar.fpsr
+ *		 r21: user-level b0
+ *		 r22: cleared
+ *		 r23: user-level ar.bspstore
+ *		 r24: user-level ar.rnat
+ *		 r25: user-level ar.unat
+ *		 r26: user-level ar.pfs
+ *		 r27: user-level ar.rsc
+ *		 r28: user-level ip
+ *		 r29: user-level psr
+ *		 r30: user-level cfm
+ *		 r31: user-level pr
+ *	      f6-f11: cleared
+ *		  pr: restored (user-level pr)
+ *		  b0: restored (user-level rp)
+ *	          b6: restored
+ *		  b7: cleared
+ *	     ar.unat: restored (user-level ar.unat)
+ *	      ar.pfs: restored (user-level ar.pfs)
+ *	      ar.rsc: restored (user-level ar.rsc)
+ *	     ar.rnat: restored (user-level ar.rnat)
+ *	 ar.bspstore: restored (user-level ar.bspstore)
+ *	     ar.fpsr: restored (user-level ar.fpsr)
+ *	      ar.ccv: cleared
+ *	      ar.csd: cleared
+ *	      ar.ssd: cleared
+ */
+GLOBAL_ENTRY(__ia64_leave_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	rsm psr.i				// disable interrupts
+	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else /* !CONFIG_PREEMPT */
+(pUStk)	rsm psr.i
+	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.work_processed_syscall:
+	adds r2=PT(LOADRS)+16,r12
+	adds r3=PT(AR_BSPSTORE)+16,r12
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	mov b7=r0		// clear b7
+	;;
+	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
+(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	mov r16=ar.bsp				// M2  get existing backing store pointer
+(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
+(p6)	br.cond.spnt .work_pending_syscall
+	;;
+	// start restoring the state saved on the kernel stack (struct pt_regs):
+	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
+	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
+	mov f6=f0		// clear f6
+	;;
+	invala			// M0|1 invalidate ALAT
+	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
+	mov f9=f0		// clear f9
+
+	ld8 r29=[r2],16		// load cr.ipsr
+	ld8 r28=[r3],16			// load cr.iip
+	mov f8=f0		// clear f8
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	mov f10=f0		// clear f10
+	;;
+	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
+	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
+	mov f11=f0		// clear f11
+	;;
+	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
+	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
+	ld8.fill r1=[r3],16	// load r1
+(pUStk) mov r17=1
+	;;
+	srlz.d			// M0  ensure interruption collection is off
+	ld8.fill r13=[r3],16
+	mov f7=f0		// clear f7
+	;;
+	ld8.fill r12=[r2]	// restore r12 (sp)
+	mov.m ar.ssd=r0		// M2 clear ar.ssd
+	mov r22=r0		// clear r22
+
+	ld8.fill r15=[r3]	// restore r15
+(pUStk) st1 [r14]=r17
+	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
+	;;
+(pUStk)	ld4 r17=[r3]		// r17 = cpu_data->phys_stacked_size_p8
+	mov.m ar.csd=r0		// M2 clear ar.csd
+	mov b6=r18		// I0  restore b6
+	;;
+	mov r14=r0		// clear r14
+	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
+(pKStk) br.cond.dpnt.many skip_rbs_switch
+
+	mov.m ar.ccv=r0		// clear ar.ccv
+(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
+	br.cond.sptk.many rbs_switch
+END(__ia64_leave_syscall)
+
+#ifdef CONFIG_IA32_SUPPORT
+GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
+	PT_REGS_UNWIND_INFO(0)
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	;;
+	.mem.offset 0,0
+	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
+	.mem.offset 8,0
+	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
+	;;
+	// don't fall through, ia64_leave_kernel may be #define'd
+	br.cond.sptk.few ia64_leave_kernel
+	;;
+END(ia64_ret_from_ia32_execve)
+#endif /* CONFIG_IA32_SUPPORT */
+GLOBAL_ENTRY(__ia64_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	rsm psr.i				// disable interrupts
+	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else
+(pUStk)	rsm psr.i
+	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.work_processed_kernel:
+	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
+	adds r21=PT(PR)+16,r12
+	;;
+
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
+	adds r30=PT(AR_CCV)+16,r12
+(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+(p6)	br.cond.spnt .work_pending
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	ldf.fill f11=[r2]
+	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	nop.i 0
+	nop.i 0
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r21=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+(pUStk)	mov r17=1
+	;;
+	ld8.fill r3=[r16]
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
+	;;
+	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+(pKStk)	br.cond.dpnt skip_rbs_switch
+
+	/*
+	 * Restore user backing store.
+	 *
+	 * NOTE: alloc, loadrs, and cover can't be predicated.
+	 */
+(pNonSys) br.cond.dpnt dont_preserve_current_frame
+
+rbs_switch:
+	cover				// add current frame into dirty partition and set cr.ifs
+	;;
+	mov r19=ar.bsp			// get new backing store pointer
+	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
+	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
+	;;
+	sub r19=r19,r16			// calculate total byte size of dirty partition
+	add r18=64,r18			// don't force in0-in7 into memory...
+	;;
+	shl r19=r19,16			// shift size of dirty partition into loadrs position
+	;;
+dont_preserve_current_frame:
+	/*
+	 * To prevent leaking bits between the kernel and user-space,
+	 * we must clear the stacked registers in the "invalid" partition here.
+	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
+	 * 5 registers/cycle on McKinley).
+	 */
+#	define pRecurse	p6
+#	define pReturn	p7
+#ifdef CONFIG_ITANIUM
+#	define Nregs	10
+#else
+#	define Nregs	14
+#endif
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r17
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
+rse_clear_invalid:
+#ifdef CONFIG_ITANIUM
+	// cycle 0
+ { .mii
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+}{ .mfb
+	add out1=1,in1			// increment recursion count
+	nop.f 0
+	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
+	;;
+}{ .mfi	// cycle 1
+	mov loc1=0
+	nop.f 0
+	mov loc2=0
+}{ .mib
+	mov loc3=0
+	mov loc4=0
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
+
+}{ .mfi	// cycle 2
+	mov loc5=0
+	nop.f 0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+}{ .mib
+	mov loc6=0
+	mov loc7=0
+(pReturn) br.ret.sptk.many b0
+}
+#else /* !CONFIG_ITANIUM */
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1			// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
+(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
+(pReturn) br.ret.sptk.many b0
+#endif /* !CONFIG_ITANIUM */
+#	undef pRecurse
+#	undef pReturn
+	;;
+	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
+	;;
+	loadrs
+	;;
+skip_rbs_switch:
+	mov ar.unat=r25		// M2
+(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
+(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
+	;;
+(pUStk)	mov ar.bspstore=r23	// M2
+(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
+(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
+	;;
+	mov cr.ipsr=r29		// M2
+	mov ar.pfs=r26		// I0
+(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
+
+(p9)	mov cr.ifs=r30		// M2
+	mov b0=r21		// I0
+(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
+
+	mov ar.fpsr=r20		// M2
+	mov cr.iip=r28		// M2
+	nop 0
+	;;
+(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
+	nop 0
+(pLvSys)mov r2=r0
+
+	mov ar.rsc=r27		// M2
+	mov pr=r31,-1		// I0
+	rfi			// B
+
+	/*
+	 * On entry:
+	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+	 *	r31 = current->thread_info->flags
+	 * On exit:
+	 *	p6 = TRUE if work-pending-check needs to be redone
+	 */
+.work_pending_syscall:
+	add r2=-8,r2
+	add r3=-8,r3
+	;;
+	st8 [r2]=r8
+	st8 [r3]=r10
+.work_pending:
+	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
+(p6)	br.cond.sptk.few .sigdelayed
+	;;
+	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
+(p6)	br.cond.sptk.few .notify
+#ifdef CONFIG_PREEMPT
+(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
+	;;
+(pKStk) st4 [r20]=r21
+	ssm psr.i		// enable interrupts
+#endif
+	br.call.spnt.many rp=schedule
+.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
+	rsm psr.i		// disable interrupts
+	;;
+#ifdef CONFIG_PREEMPT
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
+#endif
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// re-check
+
+.notify:
+(pUStk)	br.call.spnt.many rp=notify_resume_user
+.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// don't re-check
+
+// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
+// it could not be delivered.  Deliver it now.  The signal might be for us and
+// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
+// signal.
+
+.sigdelayed:
+	br.call.sptk.many rp=do_sigdelayed
+	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// re-check
+
+.work_pending_syscall_end:
+	adds r2=PT(R8)+16,r12
+	adds r3=PT(R10)+16,r12
+	;;
+	ld8 r8=[r2]
+	ld8 r10=[r3]
+	br.cond.sptk.many .work_processed_syscall	// re-check
+
+END(__ia64_leave_kernel)
+
+ENTRY(handle_syscall_error)
+	/*
+	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
+	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
+	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
+	 * pt_regs.r8 is zero, we assume that the call completed successfully.
+	 */
+	PT_REGS_UNWIND_INFO(0)
+	ld8 r3=[r2]		// load pt_regs.r8
+	;;
+	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
+	;;
+(p7)	mov r10=-1
+(p7)	sub r8=0,r8		// negate return value to get errno
+	br.cond.sptk ia64_leave_syscall
+END(handle_syscall_error)
+
+	/*
+	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+	 * in case a system call gets restarted.
+	 */
+GLOBAL_ENTRY(ia64_invoke_schedule_tail)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,1,0
+	mov loc0=rp
+	mov out0=r8				// Address of previous task
+	;;
+	br.call.sptk.many rp=schedule_tail
+.ret11:	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(ia64_invoke_schedule_tail)
+
+	/*
+	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
+	 * be set up by the caller.  We declare 8 input registers so the system call
+	 * args get preserved, in case we need to restart a system call.
+	 */
+GLOBAL_ENTRY(notify_resume_user)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+	mov r9=ar.unat
+	mov loc0=rp				// save return address
+	mov out0=0				// there is no "oldset"
+	adds out1=8,sp				// out1=&sigscratch->ar_pfs
+(pSys)	mov out2=1				// out2==1 => we're in a syscall
+	;;
+(pNonSys) mov out2=0				// out2==0 => not a syscall
+	.fframe 16
+	.spillsp ar.unat, 16
+	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
+	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
+	.body
+	br.call.sptk.many rp=do_notify_resume_user
+.ret15:	.restore sp
+	adds sp=16,sp				// pop scratch stack space
+	;;
+	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
+	mov rp=loc0
+	;;
+	mov ar.unat=r9
+	mov ar.pfs=loc1
+	br.ret.sptk.many rp
+END(notify_resume_user)
+
+GLOBAL_ENTRY(sys_rt_sigsuspend)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
+	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+	mov r9=ar.unat
+	mov loc0=rp				// save return address
+	mov out0=in0				// mask
+	mov out1=in1				// sigsetsize
+	adds out2=8,sp				// out2=&sigscratch->ar_pfs
+	;;
+	.fframe 16
+	.spillsp ar.unat, 16
+	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
+	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
+	.body
+	br.call.sptk.many rp=ia64_rt_sigsuspend
+.ret17:	.restore sp
+	adds sp=16,sp				// pop scratch stack space
+	;;
+	ld8 r9=[sp]				// load new unat from sw->caller_unat
+	mov rp=loc0
+	;;
+	mov ar.unat=r9
+	mov ar.pfs=loc1
+	br.ret.sptk.many rp
+END(sys_rt_sigsuspend)
+
+ENTRY(sys_rt_sigreturn)
+	PT_REGS_UNWIND_INFO(0)
+	/*
+	 * Allocate 8 input registers since ptrace() may clobber them
+	 */
+	alloc r2=ar.pfs,8,0,1,0
+	.prologue
+	PT_REGS_SAVES(16)
+	adds sp=-16,sp
+	.body
+	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
+	;;
+	/*
+	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
+	 * syscall-entry path does not save them we save them here instead.  Note: we
+	 * don't need to save any other registers that are not saved by the stream-lined
+	 * syscall path, because restore_sigcontext() restores them.
+	 */
+	adds r16=PT(F6)+32,sp
+	adds r17=PT(F7)+32,sp
+	;;
+ 	stf.spill [r16]=f6,32
+ 	stf.spill [r17]=f7,32
+	;;
+ 	stf.spill [r16]=f8,32
+ 	stf.spill [r17]=f9,32
+	;;
+ 	stf.spill [r16]=f10
+ 	stf.spill [r17]=f11
+	adds out0=16,sp				// out0 = &sigscratch
+	br.call.sptk.many rp=ia64_rt_sigreturn
+.ret19:	.restore sp 0
+	adds sp=16,sp
+	;;
+	ld8 r9=[sp]				// load new ar.unat
+	mov.sptk b7=r8,__ia64_leave_kernel
+	;;
+	mov ar.unat=r9
+	br.many b7
+END(sys_rt_sigreturn)
+
+GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
+	.prologue
+	/*
+	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
+	 */
+	mov r16=r0
+	DO_SAVE_SWITCH_STACK
+	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
+.ret21:	.body
+	DO_LOAD_SWITCH_STACK
+	br.cond.sptk.many rp				// goes to ia64_leave_kernel
+END(ia64_prepare_handle_unaligned)
+
+	//
+	// unw_init_running(void (*callback)(info, arg), void *arg)
+	//
+#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
+
+GLOBAL_ENTRY(unw_init_running)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	alloc loc1=ar.pfs,2,3,3,0
+	;;
+	ld8 loc2=[in0],8
+	mov loc0=rp
+	mov r16=loc1
+	DO_SAVE_SWITCH_STACK
+	.body
+
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
+	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
+	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
+	adds sp=-EXTRA_FRAME_SIZE,sp
+	.body
+	;;
+	adds out0=16,sp				// &info
+	mov out1=r13				// current
+	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
+	br.call.sptk.many rp=unw_init_frame_info
+1:	adds out0=16,sp				// &info
+	mov b6=loc2
+	mov loc2=gp				// save gp across indirect function call
+	;;
+	ld8 gp=[in0]
+	mov out1=in1				// arg
+	br.call.sptk.many rp=b6			// invoke the callback function
+1:	mov gp=loc2				// restore gp
+
+	// For now, we don't allow changing registers from within
+	// unw_init_running; if we ever want to allow that, we'd
+	// have to do a load_switch_stack here:
+	.restore sp
+	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
+
+	mov ar.pfs=loc1
+	mov rp=loc0
+	br.ret.sptk.many rp
+END(unw_init_running)
+
+	.rodata
+	.align 8
+	.globl sys_call_table
+sys_call_table:
+	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
+	data8 sys_exit				// 1025
+	data8 sys_read
+	data8 sys_write
+	data8 sys_open
+	data8 sys_close
+	data8 sys_creat				// 1030
+	data8 sys_link
+	data8 sys_unlink
+	data8 ia64_execve
+	data8 sys_chdir
+	data8 sys_fchdir			// 1035
+	data8 sys_utimes
+	data8 sys_mknod
+	data8 sys_chmod
+	data8 sys_chown
+	data8 sys_lseek				// 1040
+	data8 sys_getpid
+	data8 sys_getppid
+	data8 sys_mount
+	data8 sys_umount
+	data8 sys_setuid			// 1045
+	data8 sys_getuid
+	data8 sys_geteuid
+	data8 sys_ptrace
+	data8 sys_access
+	data8 sys_sync				// 1050
+	data8 sys_fsync
+	data8 sys_fdatasync
+	data8 sys_kill
+	data8 sys_rename
+	data8 sys_mkdir				// 1055
+	data8 sys_rmdir
+	data8 sys_dup
+	data8 sys_pipe
+	data8 sys_times
+	data8 ia64_brk				// 1060
+	data8 sys_setgid
+	data8 sys_getgid
+	data8 sys_getegid
+	data8 sys_acct
+	data8 sys_ioctl				// 1065
+	data8 sys_fcntl
+	data8 sys_umask
+	data8 sys_chroot
+	data8 sys_ustat
+	data8 sys_dup2				// 1070
+	data8 sys_setreuid
+	data8 sys_setregid
+	data8 sys_getresuid
+	data8 sys_setresuid
+	data8 sys_getresgid			// 1075
+	data8 sys_setresgid
+	data8 sys_getgroups
+	data8 sys_setgroups
+	data8 sys_getpgid
+	data8 sys_setpgid			// 1080
+	data8 sys_setsid
+	data8 sys_getsid
+	data8 sys_sethostname
+	data8 sys_setrlimit
+	data8 sys_getrlimit			// 1085
+	data8 sys_getrusage
+	data8 sys_gettimeofday
+	data8 sys_settimeofday
+	data8 sys_select
+	data8 sys_poll				// 1090
+	data8 sys_symlink
+	data8 sys_readlink
+	data8 sys_uselib
+	data8 sys_swapon
+	data8 sys_swapoff			// 1095
+	data8 sys_reboot
+	data8 sys_truncate
+	data8 sys_ftruncate
+	data8 sys_fchmod
+	data8 sys_fchown			// 1100
+	data8 ia64_getpriority
+	data8 sys_setpriority
+	data8 sys_statfs
+	data8 sys_fstatfs
+	data8 sys_gettid			// 1105
+	data8 sys_semget
+	data8 sys_semop
+	data8 sys_semctl
+	data8 sys_msgget
+	data8 sys_msgsnd			// 1110
+	data8 sys_msgrcv
+	data8 sys_msgctl
+	data8 sys_shmget
+	data8 sys_shmat
+	data8 sys_shmdt				// 1115
+	data8 sys_shmctl
+	data8 sys_syslog
+	data8 sys_setitimer
+	data8 sys_getitimer
+	data8 sys_ni_syscall			// 1120		/* was: ia64_oldstat */
+	data8 sys_ni_syscall					/* was: ia64_oldlstat */
+	data8 sys_ni_syscall					/* was: ia64_oldfstat */
+	data8 sys_vhangup
+	data8 sys_lchown
+	data8 sys_remap_file_pages		// 1125
+	data8 sys_wait4
+	data8 sys_sysinfo
+	data8 sys_clone
+	data8 sys_setdomainname
+	data8 sys_newuname			// 1130
+	data8 sys_adjtimex
+	data8 sys_ni_syscall					/* was: ia64_create_module */
+	data8 sys_init_module
+	data8 sys_delete_module
+	data8 sys_ni_syscall			// 1135		/* was: sys_get_kernel_syms */
+	data8 sys_ni_syscall					/* was: sys_query_module */
+	data8 sys_quotactl
+	data8 sys_bdflush
+	data8 sys_sysfs
+	data8 sys_personality			// 1140
+	data8 sys_ni_syscall		// sys_afs_syscall
+	data8 sys_setfsuid
+	data8 sys_setfsgid
+	data8 sys_getdents
+	data8 sys_flock				// 1145
+	data8 sys_readv
+	data8 sys_writev
+	data8 sys_pread64
+	data8 sys_pwrite64
+	data8 sys_sysctl			// 1150
+	data8 sys_mmap
+	data8 sys_munmap
+	data8 sys_mlock
+	data8 sys_mlockall
+	data8 sys_mprotect			// 1155
+	data8 ia64_mremap
+	data8 sys_msync
+	data8 sys_munlock
+	data8 sys_munlockall
+	data8 sys_sched_getparam		// 1160
+	data8 sys_sched_setparam
+	data8 sys_sched_getscheduler
+	data8 sys_sched_setscheduler
+	data8 sys_sched_yield
+	data8 sys_sched_get_priority_max	// 1165
+	data8 sys_sched_get_priority_min
+	data8 sys_sched_rr_get_interval
+	data8 sys_nanosleep
+	data8 sys_nfsservctl
+	data8 sys_prctl				// 1170
+	data8 sys_getpagesize
+	data8 sys_mmap2
+	data8 sys_pciconfig_read
+	data8 sys_pciconfig_write
+	data8 sys_perfmonctl			// 1175
+	data8 sys_sigaltstack
+	data8 sys_rt_sigaction
+	data8 sys_rt_sigpending
+	data8 sys_rt_sigprocmask
+	data8 sys_rt_sigqueueinfo		// 1180
+	data8 sys_rt_sigreturn
+	data8 sys_rt_sigsuspend
+	data8 sys_rt_sigtimedwait
+	data8 sys_getcwd
+	data8 sys_capget			// 1185
+	data8 sys_capset
+	data8 sys_sendfile64
+	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
+	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
+	data8 sys_socket			// 1190
+	data8 sys_bind
+	data8 sys_connect
+	data8 sys_listen
+	data8 sys_accept
+	data8 sys_getsockname			// 1195
+	data8 sys_getpeername
+	data8 sys_socketpair
+	data8 sys_send
+	data8 sys_sendto
+	data8 sys_recv				// 1200
+	data8 sys_recvfrom
+	data8 sys_shutdown
+	data8 sys_setsockopt
+	data8 sys_getsockopt
+	data8 sys_sendmsg			// 1205
+	data8 sys_recvmsg
+	data8 sys_pivot_root
+	data8 sys_mincore
+	data8 sys_madvise
+	data8 sys_newstat			// 1210
+	data8 sys_newlstat
+	data8 sys_newfstat
+	data8 sys_clone2
+	data8 sys_getdents64
+	data8 sys_getunwind			// 1215
+	data8 sys_readahead
+	data8 sys_setxattr
+	data8 sys_lsetxattr
+	data8 sys_fsetxattr
+	data8 sys_getxattr			// 1220
+	data8 sys_lgetxattr
+	data8 sys_fgetxattr
+	data8 sys_listxattr
+	data8 sys_llistxattr
+	data8 sys_flistxattr			// 1225
+	data8 sys_removexattr
+	data8 sys_lremovexattr
+	data8 sys_fremovexattr
+	data8 sys_tkill
+	data8 sys_futex				// 1230
+	data8 sys_sched_setaffinity
+	data8 sys_sched_getaffinity
+	data8 sys_set_tid_address
+	data8 sys_fadvise64_64
+	data8 sys_tgkill 			// 1235
+	data8 sys_exit_group
+	data8 sys_lookup_dcookie
+	data8 sys_io_setup
+	data8 sys_io_destroy
+	data8 sys_io_getevents			// 1240
+	data8 sys_io_submit
+	data8 sys_io_cancel
+	data8 sys_epoll_create
+	data8 sys_epoll_ctl
+	data8 sys_epoll_wait			// 1245
+	data8 sys_restart_syscall
+	data8 sys_semtimedop
+	data8 sys_timer_create
+	data8 sys_timer_settime
+	data8 sys_timer_gettime			// 1250
+	data8 sys_timer_getoverrun
+	data8 sys_timer_delete
+	data8 sys_clock_settime
+	data8 sys_clock_gettime
+	data8 sys_clock_getres			// 1255
+	data8 sys_clock_nanosleep
+	data8 sys_fstatfs64
+	data8 sys_statfs64
+	data8 sys_mbind
+	data8 sys_get_mempolicy			// 1260
+	data8 sys_set_mempolicy
+	data8 sys_mq_open
+	data8 sys_mq_unlink
+	data8 sys_mq_timedsend
+	data8 sys_mq_timedreceive		// 1265
+	data8 sys_mq_notify
+	data8 sys_mq_getsetattr
+	data8 sys_ni_syscall			// reserved for kexec_load
+	data8 sys_ni_syscall			// reserved for vserver
+	data8 sys_waitid			// 1270
+	data8 sys_add_key
+	data8 sys_request_key
+	data8 sys_keyctl
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall			// 1275
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+
+	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S b/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
new file mode 100644
index 0000000000..222a68d1cf
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
@@ -0,0 +1,1224 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address.  All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
+ *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
+ * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
+ *   Support for CPU Hotplug
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/fpu.h>
+#include <asm/kregs.h>
+#include <asm/mmu_context.h>
+#include <asm/offsets.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/mca_asm.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define SAL_PSR_BITS_TO_SET				\
+	(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
+
+#define SAVE_FROM_REG(src, ptr, dest)	\
+	mov dest=src;;						\
+	st8 [ptr]=dest,0x08
+
+#define RESTORE_REG(reg, ptr, _tmp)		\
+	ld8 _tmp=[ptr],0x08;;				\
+	mov reg=_tmp
+
+#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;; 			\
+	mov _idx=0;; 								\
+1: 												\
+	SAVE_FROM_REG(_breg[_idx], ptr, _dest);;	\
+	add _idx=1,_idx;;							\
+	br.cloop.sptk.many 1b
+
+#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
+	mov ar.lc=IA64_NUM_DBG_REGS-1;;			\
+	mov _idx=0;;							\
+_lbl:  RESTORE_REG(_breg[_idx], ptr, _tmp);;	\
+	add _idx=1, _idx;;						\
+	br.cloop.sptk.many _lbl
+
+#define SAVE_ONE_RR(num, _reg, _tmp) \
+	movl _tmp=(num<<61);;	\
+	mov _reg=rr[_tmp]
+
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	SAVE_ONE_RR(0,_r0, _tmp);; \
+	SAVE_ONE_RR(1,_r1, _tmp);; \
+	SAVE_ONE_RR(2,_r2, _tmp);; \
+	SAVE_ONE_RR(3,_r3, _tmp);; \
+	SAVE_ONE_RR(4,_r4, _tmp);; \
+	SAVE_ONE_RR(5,_r5, _tmp);; \
+	SAVE_ONE_RR(6,_r6, _tmp);; \
+	SAVE_ONE_RR(7,_r7, _tmp);;
+
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
+	st8 [ptr]=_r0, 8;; \
+	st8 [ptr]=_r1, 8;; \
+	st8 [ptr]=_r2, 8;; \
+	st8 [ptr]=_r3, 8;; \
+	st8 [ptr]=_r4, 8;; \
+	st8 [ptr]=_r5, 8;; \
+	st8 [ptr]=_r6, 8;; \
+	st8 [ptr]=_r7, 8;;
+
+#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
+	mov		ar.lc=0x08-1;;						\
+	movl	_idx1=0x00;;						\
+RestRR:											\
+	dep.z	_idx2=_idx1,61,3;;					\
+	ld8		_tmp=[ptr],8;;						\
+	mov		rr[_idx2]=_tmp;;					\
+	srlz.d;;									\
+	add		_idx1=1,_idx1;;						\
+	br.cloop.sptk.few	RestRR
+
+#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
+	movl reg1=sal_state_for_booting_cpu;;	\
+	ld8 reg2=[reg1];;
+
+/*
+ * Adjust region registers saved before starting to save
+ * break regs and rest of the states that need to be preserved.
+ */
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred)  \
+	SAVE_FROM_REG(b0,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b1,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b2,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b3,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b4,_reg1,_reg2);;						\
+	SAVE_FROM_REG(b5,_reg1,_reg2);;						\
+	st8 [_reg1]=r1,0x08;;								\
+	st8 [_reg1]=r12,0x08;;								\
+	st8 [_reg1]=r13,0x08;;								\
+	SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.pfs,_reg1,_reg2);;					\
+	SAVE_FROM_REG(ar.rnat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.unat,_reg1,_reg2);;				\
+	SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);;			\
+	SAVE_FROM_REG(cr.dcr,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.iva,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pta,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.itv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.pmv,_reg1,_reg2);;					\
+	SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);;				\
+	SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);;				\
+	st8 [_reg1]=r4,0x08;;								\
+	st8 [_reg1]=r5,0x08;;								\
+	st8 [_reg1]=r6,0x08;;								\
+	st8 [_reg1]=r7,0x08;;								\
+	st8 [_reg1]=_pred,0x08;;							\
+	SAVE_FROM_REG(ar.lc, _reg1, _reg2);;				\
+	stf.spill.nta [_reg1]=f2,16;;						\
+	stf.spill.nta [_reg1]=f3,16;;						\
+	stf.spill.nta [_reg1]=f4,16;;						\
+	stf.spill.nta [_reg1]=f5,16;;						\
+	stf.spill.nta [_reg1]=f16,16;;						\
+	stf.spill.nta [_reg1]=f17,16;;						\
+	stf.spill.nta [_reg1]=f18,16;;						\
+	stf.spill.nta [_reg1]=f19,16;;						\
+	stf.spill.nta [_reg1]=f20,16;;						\
+	stf.spill.nta [_reg1]=f21,16;;						\
+	stf.spill.nta [_reg1]=f22,16;;						\
+	stf.spill.nta [_reg1]=f23,16;;						\
+	stf.spill.nta [_reg1]=f24,16;;						\
+	stf.spill.nta [_reg1]=f25,16;;						\
+	stf.spill.nta [_reg1]=f26,16;;						\
+	stf.spill.nta [_reg1]=f27,16;;						\
+	stf.spill.nta [_reg1]=f28,16;;						\
+	stf.spill.nta [_reg1]=f29,16;;						\
+	stf.spill.nta [_reg1]=f30,16;;						\
+	stf.spill.nta [_reg1]=f31,16;;
+
+#else
+#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
+#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
+#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
+#endif
+
+#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
+	movl _tmp1=(num << 61);;	\
+	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
+	mov rr[_tmp1]=_tmp2
+
+	.section __special_page_section,"ax"
+
+	.global empty_zero_page
+empty_zero_page:
+	.skip PAGE_SIZE
+
+	.global swapper_pg_dir
+swapper_pg_dir:
+	.skip PAGE_SIZE
+
+	.rodata
+halt_msg:
+	stringz "Halting kernel\n"
+
+	.text
+
+	.global start_ap
+
+	/*
+	 * Start the kernel.  When the bootloader passes control to _start(), r28
+	 * points to the address of the boot parameter area.  Execution reaches
+	 * here in physical mode.
+	 */
+GLOBAL_ENTRY(_start)
+start_ap:
+	.prologue
+	.save rp, r0		// terminate unwind chain with a NULL rp
+	.body
+
+	rsm psr.i | psr.ic
+	;;
+	srlz.i
+	;;
+	/*
+	 * Save the region registers, predicate before they get clobbered
+	 */
+	SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
+	mov r25=pr;;
+
+	/*
+	 * Initialize kernel region registers:
+	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
+	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+	 * We initialize all of them to prevent inadvertently assuming
+	 * something about the state of address translation early in boot.
+	 */
+	SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
+	SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
+	SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
+	/*
+	 * Now pin mappings into the TLB for kernel text and data
+	 */
+	mov r18=KERNEL_TR_PAGE_SHIFT<<2
+	movl r17=KERNEL_START
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r17
+	mov r16=IA64_TR_KERNEL
+	mov r3=ip
+	movl r18=PAGE_KERNEL
+	;;
+	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+	;;
+	or r18=r2,r18
+	;;
+	srlz.i
+	;;
+	itr.i itr[r16]=r18
+	;;
+	itr.d dtr[r16]=r18
+	;;
+	srlz.i
+
+	/*
+	 * Switch into virtual mode:
+	 */
+	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+		  |IA64_PSR_DI)
+	;;
+	mov cr.ipsr=r16
+	movl r17=1f
+	;;
+	mov cr.iip=r17
+	mov cr.ifs=r0
+	;;
+	rfi
+	;;
+1:	// now we are in virtual mode
+
+	SET_AREA_FOR_BOOTING_CPU(r2, r16);
+
+	STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
+	SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
+	;;
+
+	// set IVT entry point---can't access I/O ports without it
+	movl r3=ia64_ivt
+	;;
+	mov cr.iva=r3
+	movl r2=FPSR_DEFAULT
+	;;
+	srlz.i
+	movl gp=__gp
+
+	mov ar.fpsr=r2
+	;;
+
+#define isAP	p2	// are we an Application Processor?
+#define isBP	p3	// are we the Bootstrap Processor?
+
+#ifdef CONFIG_SMP
+	/*
+	 * Find the init_task for the currently booting CPU.  At poweron, and in
+	 * UP mode, task_for_booting_cpu is NULL.
+	 */
+	movl r3=task_for_booting_cpu
+ 	;;
+	ld8 r3=[r3]
+	movl r2=init_task
+	;;
+	cmp.eq isBP,isAP=r3,r0
+	;;
+(isAP)	mov r2=r3
+#else
+	movl r2=init_task
+	cmp.eq isBP,isAP=r0,r0
+#endif
+	;;
+	tpa r3=r2		// r3 == phys addr of task struct
+	mov r16=-1
+(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
+
+	// load mapping for stack (virtaddr in r2, physaddr in r3)
+	rsm psr.ic
+	movl r17=PAGE_KERNEL
+	;;
+	srlz.d
+	dep r18=0,r3,0,12
+	;;
+	or r18=r17,r18
+	dep r2=-1,r3,61,3	// IMVA of task
+	;;
+	mov r17=rr[r2]
+	shr.u r16=r3,IA64_GRANULE_SHIFT
+	;;
+	dep r17=0,r17,8,24
+	;;
+	mov cr.itir=r17
+	mov cr.ifa=r2
+
+	mov r19=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r19]=r18
+	;;
+	ssm psr.ic
+	srlz.d
+  	;;
+
+.load_current:
+	// load the "current" pointer (r13) and ar.k6 with the current task
+	mov IA64_KR(CURRENT)=r2		// virtual address
+	mov IA64_KR(CURRENT_STACK)=r16
+	mov r13=r2
+	/*
+	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel
+	 * threads don't store interesting values in that structure, but the space
+	 * still needs to be there because time-critical stuff such as the context
+	 * switching can be implemented more efficiently (for example, __switch_to()
+	 * always sets the psr.dfh bit of the task it is switching to).
+	 */
+
+	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
+	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
+	mov ar.rsc=0		// place RSE in enforced lazy mode
+	;;
+	loadrs			// clear the dirty partition
+	;;
+	mov ar.bspstore=r2	// establish the new RSE stack
+	;;
+	mov ar.rsc=0x3		// place RSE in eager mode
+
+(isBP)	dep r28=-1,r28,61,3	// make address virtual
+(isBP)	movl r2=ia64_boot_param
+	;;
+(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
+
+#ifdef CONFIG_SMP
+(isAP)	br.call.sptk.many rp=start_secondary
+.ret0:
+(isAP)	br.cond.sptk self
+#endif
+
+	// This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_XEN
+	br.call.sptk.many rp=early_xen_setup
+	;;
+#endif
+#ifdef CONFIG_IA64_FW_EMU
+	// initialize PAL & SAL emulator:
+	br.call.sptk.many rp=sys_fw_init
+.ret1:
+#endif
+	br.call.sptk.many rp=start_kernel
+.ret2:	addl r3=@ltoff(halt_msg),gp
+	;;
+	alloc r2=ar.pfs,8,0,2,0
+	;;
+	ld8 out0=[r3]
+	br.call.sptk.many b0=console_print
+
+self:	hint @pause
+	br.sptk.many self		// endless loop
+END(_start)
+
+GLOBAL_ENTRY(ia64_save_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	mov r20=ar.lc			// preserve ar.lc
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=0
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	;;
+1:	mov r16=dbr[r18]
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d
+#endif
+	mov r17=ibr[r18]
+	add r18=1,r18
+	;;
+	st8.nta [in0]=r16,8
+	st8.nta [r19]=r17,8
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_save_debug_regs)
+
+GLOBAL_ENTRY(ia64_load_debug_regs)
+	alloc r16=ar.pfs,1,0,0,0
+	lfetch.nta [in0]
+	mov r20=ar.lc			// preserve ar.lc
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=-1
+	;;
+1:	ld8.nta r16=[in0],8
+	ld8.nta r17=[r19],8
+	add r18=1,r18
+	;;
+	mov dbr[r18]=r16
+#ifdef CONFIG_ITANIUM
+	;;
+	srlz.d				// Errata 132 (NoFix status)
+#endif
+	mov ibr[r18]=r17
+	br.cloop.sptk.many 1b
+	;;
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.many rp
+END(ia64_load_debug_regs)
+
+GLOBAL_ENTRY(__ia64_save_fpu)
+	alloc r2=ar.pfs,1,4,0,0
+	adds loc0=96*16-16,in0
+	adds loc1=96*16-16-128,in0
+	;;
+	stf.spill.nta [loc0]=f127,-256
+	stf.spill.nta [loc1]=f119,-256
+	;;
+	stf.spill.nta [loc0]=f111,-256
+	stf.spill.nta [loc1]=f103,-256
+	;;
+	stf.spill.nta [loc0]=f95,-256
+	stf.spill.nta [loc1]=f87,-256
+	;;
+	stf.spill.nta [loc0]=f79,-256
+	stf.spill.nta [loc1]=f71,-256
+	;;
+	stf.spill.nta [loc0]=f63,-256
+	stf.spill.nta [loc1]=f55,-256
+	adds loc2=96*16-32,in0
+	;;
+	stf.spill.nta [loc0]=f47,-256
+	stf.spill.nta [loc1]=f39,-256
+	adds loc3=96*16-32-128,in0
+	;;
+	stf.spill.nta [loc2]=f126,-256
+	stf.spill.nta [loc3]=f118,-256
+	;;
+	stf.spill.nta [loc2]=f110,-256
+	stf.spill.nta [loc3]=f102,-256
+	;;
+	stf.spill.nta [loc2]=f94,-256
+	stf.spill.nta [loc3]=f86,-256
+	;;
+	stf.spill.nta [loc2]=f78,-256
+	stf.spill.nta [loc3]=f70,-256
+	;;
+	stf.spill.nta [loc2]=f62,-256
+	stf.spill.nta [loc3]=f54,-256
+	adds loc0=96*16-48,in0
+	;;
+	stf.spill.nta [loc2]=f46,-256
+	stf.spill.nta [loc3]=f38,-256
+	adds loc1=96*16-48-128,in0
+	;;
+	stf.spill.nta [loc0]=f125,-256
+	stf.spill.nta [loc1]=f117,-256
+	;;
+	stf.spill.nta [loc0]=f109,-256
+	stf.spill.nta [loc1]=f101,-256
+	;;
+	stf.spill.nta [loc0]=f93,-256
+	stf.spill.nta [loc1]=f85,-256
+	;;
+	stf.spill.nta [loc0]=f77,-256
+	stf.spill.nta [loc1]=f69,-256
+	;;
+	stf.spill.nta [loc0]=f61,-256
+	stf.spill.nta [loc1]=f53,-256
+	adds loc2=96*16-64,in0
+	;;
+	stf.spill.nta [loc0]=f45,-256
+	stf.spill.nta [loc1]=f37,-256
+	adds loc3=96*16-64-128,in0
+	;;
+	stf.spill.nta [loc2]=f124,-256
+	stf.spill.nta [loc3]=f116,-256
+	;;
+	stf.spill.nta [loc2]=f108,-256
+	stf.spill.nta [loc3]=f100,-256
+	;;
+	stf.spill.nta [loc2]=f92,-256
+	stf.spill.nta [loc3]=f84,-256
+	;;
+	stf.spill.nta [loc2]=f76,-256
+	stf.spill.nta [loc3]=f68,-256
+	;;
+	stf.spill.nta [loc2]=f60,-256
+	stf.spill.nta [loc3]=f52,-256
+	adds loc0=96*16-80,in0
+	;;
+	stf.spill.nta [loc2]=f44,-256
+	stf.spill.nta [loc3]=f36,-256
+	adds loc1=96*16-80-128,in0
+	;;
+	stf.spill.nta [loc0]=f123,-256
+	stf.spill.nta [loc1]=f115,-256
+	;;
+	stf.spill.nta [loc0]=f107,-256
+	stf.spill.nta [loc1]=f99,-256
+	;;
+	stf.spill.nta [loc0]=f91,-256
+	stf.spill.nta [loc1]=f83,-256
+	;;
+	stf.spill.nta [loc0]=f75,-256
+	stf.spill.nta [loc1]=f67,-256
+	;;
+	stf.spill.nta [loc0]=f59,-256
+	stf.spill.nta [loc1]=f51,-256
+	adds loc2=96*16-96,in0
+	;;
+	stf.spill.nta [loc0]=f43,-256
+	stf.spill.nta [loc1]=f35,-256
+	adds loc3=96*16-96-128,in0
+	;;
+	stf.spill.nta [loc2]=f122,-256
+	stf.spill.nta [loc3]=f114,-256
+	;;
+	stf.spill.nta [loc2]=f106,-256
+	stf.spill.nta [loc3]=f98,-256
+	;;
+	stf.spill.nta [loc2]=f90,-256
+	stf.spill.nta [loc3]=f82,-256
+	;;
+	stf.spill.nta [loc2]=f74,-256
+	stf.spill.nta [loc3]=f66,-256
+	;;
+	stf.spill.nta [loc2]=f58,-256
+	stf.spill.nta [loc3]=f50,-256
+	adds loc0=96*16-112,in0
+	;;
+	stf.spill.nta [loc2]=f42,-256
+	stf.spill.nta [loc3]=f34,-256
+	adds loc1=96*16-112-128,in0
+	;;
+	stf.spill.nta [loc0]=f121,-256
+	stf.spill.nta [loc1]=f113,-256
+	;;
+	stf.spill.nta [loc0]=f105,-256
+	stf.spill.nta [loc1]=f97,-256
+	;;
+	stf.spill.nta [loc0]=f89,-256
+	stf.spill.nta [loc1]=f81,-256
+	;;
+	stf.spill.nta [loc0]=f73,-256
+	stf.spill.nta [loc1]=f65,-256
+	;;
+	stf.spill.nta [loc0]=f57,-256
+	stf.spill.nta [loc1]=f49,-256
+	adds loc2=96*16-128,in0
+	;;
+	stf.spill.nta [loc0]=f41,-256
+	stf.spill.nta [loc1]=f33,-256
+	adds loc3=96*16-128-128,in0
+	;;
+	stf.spill.nta [loc2]=f120,-256
+	stf.spill.nta [loc3]=f112,-256
+	;;
+	stf.spill.nta [loc2]=f104,-256
+	stf.spill.nta [loc3]=f96,-256
+	;;
+	stf.spill.nta [loc2]=f88,-256
+	stf.spill.nta [loc3]=f80,-256
+	;;
+	stf.spill.nta [loc2]=f72,-256
+	stf.spill.nta [loc3]=f64,-256
+	;;
+	stf.spill.nta [loc2]=f56,-256
+	stf.spill.nta [loc3]=f48,-256
+	;;
+	stf.spill.nta [loc2]=f40
+	stf.spill.nta [loc3]=f32
+	br.ret.sptk.many rp
+END(__ia64_save_fpu)
+
+GLOBAL_ENTRY(__ia64_load_fpu)
+	alloc r2=ar.pfs,1,2,0,0
+	adds r3=128,in0
+	adds r14=256,in0
+	adds r15=384,in0
+	mov loc0=512
+	mov loc1=-1024+16
+	;;
+	ldf.fill.nta f32=[in0],loc0
+	ldf.fill.nta f40=[ r3],loc0
+	ldf.fill.nta f48=[r14],loc0
+	ldf.fill.nta f56=[r15],loc0
+	;;
+	ldf.fill.nta f64=[in0],loc0
+	ldf.fill.nta f72=[ r3],loc0
+	ldf.fill.nta f80=[r14],loc0
+	ldf.fill.nta f88=[r15],loc0
+	;;
+	ldf.fill.nta f96=[in0],loc1
+	ldf.fill.nta f104=[ r3],loc1
+	ldf.fill.nta f112=[r14],loc1
+	ldf.fill.nta f120=[r15],loc1
+	;;
+	ldf.fill.nta f33=[in0],loc0
+	ldf.fill.nta f41=[ r3],loc0
+	ldf.fill.nta f49=[r14],loc0
+	ldf.fill.nta f57=[r15],loc0
+	;;
+	ldf.fill.nta f65=[in0],loc0
+	ldf.fill.nta f73=[ r3],loc0
+	ldf.fill.nta f81=[r14],loc0
+	ldf.fill.nta f89=[r15],loc0
+	;;
+	ldf.fill.nta f97=[in0],loc1
+	ldf.fill.nta f105=[ r3],loc1
+	ldf.fill.nta f113=[r14],loc1
+	ldf.fill.nta f121=[r15],loc1
+	;;
+	ldf.fill.nta f34=[in0],loc0
+	ldf.fill.nta f42=[ r3],loc0
+	ldf.fill.nta f50=[r14],loc0
+	ldf.fill.nta f58=[r15],loc0
+	;;
+	ldf.fill.nta f66=[in0],loc0
+	ldf.fill.nta f74=[ r3],loc0
+	ldf.fill.nta f82=[r14],loc0
+	ldf.fill.nta f90=[r15],loc0
+	;;
+	ldf.fill.nta f98=[in0],loc1
+	ldf.fill.nta f106=[ r3],loc1
+	ldf.fill.nta f114=[r14],loc1
+	ldf.fill.nta f122=[r15],loc1
+	;;
+	ldf.fill.nta f35=[in0],loc0
+	ldf.fill.nta f43=[ r3],loc0
+	ldf.fill.nta f51=[r14],loc0
+	ldf.fill.nta f59=[r15],loc0
+	;;
+	ldf.fill.nta f67=[in0],loc0
+	ldf.fill.nta f75=[ r3],loc0
+	ldf.fill.nta f83=[r14],loc0
+	ldf.fill.nta f91=[r15],loc0
+	;;
+	ldf.fill.nta f99=[in0],loc1
+	ldf.fill.nta f107=[ r3],loc1
+	ldf.fill.nta f115=[r14],loc1
+	ldf.fill.nta f123=[r15],loc1
+	;;
+	ldf.fill.nta f36=[in0],loc0
+	ldf.fill.nta f44=[ r3],loc0
+	ldf.fill.nta f52=[r14],loc0
+	ldf.fill.nta f60=[r15],loc0
+	;;
+	ldf.fill.nta f68=[in0],loc0
+	ldf.fill.nta f76=[ r3],loc0
+	ldf.fill.nta f84=[r14],loc0
+	ldf.fill.nta f92=[r15],loc0
+	;;
+	ldf.fill.nta f100=[in0],loc1
+	ldf.fill.nta f108=[ r3],loc1
+	ldf.fill.nta f116=[r14],loc1
+	ldf.fill.nta f124=[r15],loc1
+	;;
+	ldf.fill.nta f37=[in0],loc0
+	ldf.fill.nta f45=[ r3],loc0
+	ldf.fill.nta f53=[r14],loc0
+	ldf.fill.nta f61=[r15],loc0
+	;;
+	ldf.fill.nta f69=[in0],loc0
+	ldf.fill.nta f77=[ r3],loc0
+	ldf.fill.nta f85=[r14],loc0
+	ldf.fill.nta f93=[r15],loc0
+	;;
+	ldf.fill.nta f101=[in0],loc1
+	ldf.fill.nta f109=[ r3],loc1
+	ldf.fill.nta f117=[r14],loc1
+	ldf.fill.nta f125=[r15],loc1
+	;;
+	ldf.fill.nta f38 =[in0],loc0
+	ldf.fill.nta f46 =[ r3],loc0
+	ldf.fill.nta f54 =[r14],loc0
+	ldf.fill.nta f62 =[r15],loc0
+	;;
+	ldf.fill.nta f70 =[in0],loc0
+	ldf.fill.nta f78 =[ r3],loc0
+	ldf.fill.nta f86 =[r14],loc0
+	ldf.fill.nta f94 =[r15],loc0
+	;;
+	ldf.fill.nta f102=[in0],loc1
+	ldf.fill.nta f110=[ r3],loc1
+	ldf.fill.nta f118=[r14],loc1
+	ldf.fill.nta f126=[r15],loc1
+	;;
+	ldf.fill.nta f39 =[in0],loc0
+	ldf.fill.nta f47 =[ r3],loc0
+	ldf.fill.nta f55 =[r14],loc0
+	ldf.fill.nta f63 =[r15],loc0
+	;;
+	ldf.fill.nta f71 =[in0],loc0
+	ldf.fill.nta f79 =[ r3],loc0
+	ldf.fill.nta f87 =[r14],loc0
+	ldf.fill.nta f95 =[r15],loc0
+	;;
+	ldf.fill.nta f103=[in0]
+	ldf.fill.nta f111=[ r3]
+	ldf.fill.nta f119=[r14]
+	ldf.fill.nta f127=[r15]
+	br.ret.sptk.many rp
+END(__ia64_load_fpu)
+
+GLOBAL_ENTRY(__ia64_init_fpu)
+	stf.spill [sp]=f0		// M3
+	mov	 f32=f0			// F
+	nop.b	 0
+
+	ldfps	 f33,f34=[sp]		// M0
+	ldfps	 f35,f36=[sp]		// M1
+	mov      f37=f0			// F
+	;;
+
+	setf.s	 f38=r0			// M2
+	setf.s	 f39=r0			// M3
+	mov      f40=f0			// F
+
+	ldfps	 f41,f42=[sp]		// M0
+	ldfps	 f43,f44=[sp]		// M1
+	mov      f45=f0			// F
+
+	setf.s	 f46=r0			// M2
+	setf.s	 f47=r0			// M3
+	mov      f48=f0			// F
+
+	ldfps	 f49,f50=[sp]		// M0
+	ldfps	 f51,f52=[sp]		// M1
+	mov      f53=f0			// F
+
+	setf.s	 f54=r0			// M2
+	setf.s	 f55=r0			// M3
+	mov      f56=f0			// F
+
+	ldfps	 f57,f58=[sp]		// M0
+	ldfps	 f59,f60=[sp]		// M1
+	mov      f61=f0			// F
+
+	setf.s	 f62=r0			// M2
+	setf.s	 f63=r0			// M3
+	mov      f64=f0			// F
+
+	ldfps	 f65,f66=[sp]		// M0
+	ldfps	 f67,f68=[sp]		// M1
+	mov      f69=f0			// F
+
+	setf.s	 f70=r0			// M2
+	setf.s	 f71=r0			// M3
+	mov      f72=f0			// F
+
+	ldfps	 f73,f74=[sp]		// M0
+	ldfps	 f75,f76=[sp]		// M1
+	mov      f77=f0			// F
+
+	setf.s	 f78=r0			// M2
+	setf.s	 f79=r0			// M3
+	mov      f80=f0			// F
+
+	ldfps	 f81,f82=[sp]		// M0
+	ldfps	 f83,f84=[sp]		// M1
+	mov      f85=f0			// F
+
+	setf.s	 f86=r0			// M2
+	setf.s	 f87=r0			// M3
+	mov      f88=f0			// F
+
+	/*
+	 * When the instructions are cached, it would be faster to initialize
+	 * the remaining registers with simply mov instructions (F-unit).
+	 * This gets the time down to ~29 cycles.  However, this would use up
+	 * 33 bundles, whereas continuing with the above pattern yields
+	 * 10 bundles and ~30 cycles.
+	 */
+
+	ldfps	 f89,f90=[sp]		// M0
+	ldfps	 f91,f92=[sp]		// M1
+	mov      f93=f0			// F
+
+	setf.s	 f94=r0			// M2
+	setf.s	 f95=r0			// M3
+	mov      f96=f0			// F
+
+	ldfps	 f97,f98=[sp]		// M0
+	ldfps	 f99,f100=[sp]		// M1
+	mov      f101=f0		// F
+
+	setf.s	 f102=r0		// M2
+	setf.s	 f103=r0		// M3
+	mov      f104=f0		// F
+
+	ldfps	 f105,f106=[sp]		// M0
+	ldfps	 f107,f108=[sp]		// M1
+	mov      f109=f0		// F
+
+	setf.s	 f110=r0		// M2
+	setf.s	 f111=r0		// M3
+	mov      f112=f0		// F
+
+	ldfps	 f113,f114=[sp]		// M0
+	ldfps	 f115,f116=[sp]		// M1
+	mov      f117=f0		// F
+
+	setf.s	 f118=r0		// M2
+	setf.s	 f119=r0		// M3
+	mov      f120=f0		// F
+
+	ldfps	 f121,f122=[sp]		// M0
+	ldfps	 f123,f124=[sp]		// M1
+	mov      f125=f0		// F
+
+	setf.s	 f126=r0		// M2
+	setf.s	 f127=r0		// M3
+	br.ret.sptk.many rp		// F
+END(__ia64_init_fpu)
+
+/*
+ * Switch execution mode from virtual to physical
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ * Output:
+ *	r19 = old virtual address of ar.bsp
+ *	r20 = old virtual address of sp
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_phys)
+ {
+	alloc r2=ar.pfs,0,0,0,0
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_phys,r15
+
+	mov r19=ar.bsp
+	mov r20=sp
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to physical mode, use tpa to translate virt->phys
+	tpa r17=r19
+	tpa r3=r3
+	tpa sp=sp
+	tpa r14=r14
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r17		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ *	r19 = new bspstore to establish
+ *	r20 = new sp to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+	alloc r2=ar.pfs,0,0,0,0
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-ia64_switch_mode_virt,r15
+
+	mov r14=rp			// get return address into a general register
+	;;
+
+	// going to virtual
+	//   - for code addresses, set upper bits of addr to KERNEL_START
+	//   - for stack addresses, copy from input argument
+	movl r18=KERNEL_START
+	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+	mov sp=r20
+	;;
+	or r3=r3,r18
+	or r14=r14,r18
+	;;
+
+	mov r18=ar.rnat			// save ar.rnat
+	mov ar.bspstore=r19		// this steps on ar.rnat
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.many rp
+END(ia64_switch_mode_virt)
+
+GLOBAL_ENTRY(ia64_delay_loop)
+	.prologue
+{	nop 0			// work around GAS unwind info generation bug...
+	.save ar.lc,r2
+	mov r2=ar.lc
+	.body
+	;;
+	mov ar.lc=r32
+}
+	;;
+	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
+	// inside function body without corrupting unwind info).
+{	nop 0 }
+1:	br.cloop.sptk.few 1b
+	;;
+	mov ar.lc=r2
+	br.ret.sptk.many rp
+END(ia64_delay_loop)
+
+/*
+ * Return a CPU-local timestamp in nano-seconds.  This timestamp is
+ * NOT synchronized across CPUs its return value must never be
+ * compared against the values returned on another CPU.  The usage in
+ * kernel/sched.c ensures that.
+ *
+ * The return-value of sched_clock() is NOT supposed to wrap-around.
+ * If it did, it would cause some scheduling hiccups (at the worst).
+ * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
+ * that would happen only once every 5+ years.
+ *
+ * The code below basically calculates:
+ *
+ *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
+ *
+ * except that the multiplication and the shift are done with 128-bit
+ * intermediate precision so that we can produce a full 64-bit result.
+ */
+GLOBAL_ENTRY(sched_clock)
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(sched_clock)
+
+GLOBAL_ENTRY(start_kernel_thread)
+	.prologue
+	.save rp, r0				// this is the end of the call-chain
+	.body
+	alloc r2 = ar.pfs, 0, 0, 2, 0
+	mov out0 = r9
+	mov out1 = r11;;
+	br.call.sptk.many rp = kernel_thread_helper;;
+	mov out0 = r8
+	br.call.sptk.many rp = sys_exit;;
+1:	br.sptk.few 1b				// not reached
+END(start_kernel_thread)
+
+#ifdef CONFIG_IA64_BRL_EMU
+
+/*
+ *  Assembly routines used by brl_emu.c to set preserved register state.
+ */
+
+#define SET_REG(reg)				\
+ GLOBAL_ENTRY(ia64_set_##reg);			\
+	alloc r16=ar.pfs,1,0,0,0;		\
+	mov reg=r32;				\
+	;;					\
+	br.ret.sptk.many rp;			\
+ END(ia64_set_##reg)
+
+SET_REG(b1);
+SET_REG(b2);
+SET_REG(b3);
+SET_REG(b4);
+SET_REG(b5);
+
+#endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+	/*
+	 * This routine handles spinlock contention.  It uses a non-standard calling
+	 * convention to avoid converting leaf routines into interior routines.  Because
+	 * of this special convention, there are several restrictions:
+	 *
+	 * - do not use gp relative variables, this code is called from the kernel
+	 *   and from modules, r1 is undefined.
+	 * - do not use stacked registers, the caller owns them.
+	 * - do not use the scratch stack space, the caller owns it.
+	 * - do not use any registers other than the ones listed below
+	 *
+	 * Inputs:
+	 *   ar.pfs - saved CFM of caller
+	 *   ar.ccv - 0 (and available for use)
+	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
+	 *   r28    - available for use.
+	 *   r29    - available for use.
+	 *   r30    - available for use.
+	 *   r31    - address of lock, available for use.
+	 *   b6     - return address
+	 *   p14    - available for use.
+	 *   p15    - used to track flag status.
+	 *
+	 * If you patch this code to use more registers, do not forget to update
+	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
+	 */
+
+#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+
+GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
+	.prologue
+	.save ar.pfs, r0	// this code effectively has a zero frame size
+	.save rp, r28
+	.body
+	nop 0
+	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+	.restore sp		// pop existing prologue after next insn
+	mov b6 = r28
+	.prologue
+	.save ar.pfs, r0
+	.altrp b6
+	.body
+	;;
+(p15)	ssm psr.i		// reenable interrupts if they were on
+				// DavidM says that srlz.d is slow and is not required in this case
+.wait:
+	// exponential backoff, kdb, lockmeter etc. go in here
+	hint @pause
+	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
+	nop 0
+	;;
+	cmp4.ne p14,p0=r30,r0
+(p14)	br.cond.sptk.few .wait
+(p15)	rsm psr.i		// disable interrupts if we reenabled them
+	br.cond.sptk.few b6	// lock is now free, try to acquire
+	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
+ia64_spinlock_contention_pre3_4_end:
+END(ia64_spinlock_contention_pre3_4)
+
+#else
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+	.prologue
+	.altrp b6
+	.body
+	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
+	;;
+.wait:
+(p15)	ssm psr.i		// reenable interrupts if they were on
+				// DavidM says that srlz.d is slow and is not required in this case
+.wait2:
+	// exponential backoff, kdb, lockmeter etc. go in here
+	hint @pause
+	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
+	;;
+	cmp4.ne p14,p0=r30,r0
+	mov r30 = 1
+(p14)	br.cond.sptk.few .wait2
+(p15)	rsm psr.i		// disable interrupts if we reenabled them
+	;;
+	cmpxchg4.acq r30=[r31], r30, ar.ccv
+	;;
+	cmp4.ne p14,p0=r0,r30
+(p14)	br.cond.sptk.few .wait
+
+	br.ret.sptk.many b6	// lock is now taken
+END(ia64_spinlock_contention)
+
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+GLOBAL_ENTRY(ia64_jump_to_sal)
+	alloc r16=ar.pfs,1,0,0,0;;
+	rsm psr.i  | psr.ic
+{
+	flushrs
+	srlz.i
+}
+	tpa r25=in0
+	movl r18=tlb_purge_done;;
+	DATA_VA_TO_PA(r18);;
+	mov b1=r18 	// Return location
+	movl r18=ia64_do_tlb_purge;;
+	DATA_VA_TO_PA(r18);;
+	mov b2=r18 	// doing tlb_flush work
+	mov ar.rsc=0  // Put RSE  in enforced lazy, LE mode
+	movl r17=1f;;
+	DATA_VA_TO_PA(r17);;
+	mov cr.iip=r17
+	movl r16=SAL_PSR_BITS_TO_SET;;
+	mov cr.ipsr=r16
+	mov cr.ifs=r0;;
+	rfi;;
+1:
+	/*
+	 * Invalidate all TLB data/inst
+	 */
+	br.sptk.many b2;; // jump to tlb purge code
+
+tlb_purge_done:
+	RESTORE_REGION_REGS(r25, r17,r18,r19);;
+	RESTORE_REG(b0, r25, r17);;
+	RESTORE_REG(b1, r25, r17);;
+	RESTORE_REG(b2, r25, r17);;
+	RESTORE_REG(b3, r25, r17);;
+	RESTORE_REG(b4, r25, r17);;
+	RESTORE_REG(b5, r25, r17);;
+	ld8 r1=[r25],0x08;;
+	ld8 r12=[r25],0x08;;
+	ld8 r13=[r25],0x08;;
+	RESTORE_REG(ar.fpsr, r25, r17);;
+	RESTORE_REG(ar.pfs, r25, r17);;
+	RESTORE_REG(ar.rnat, r25, r17);;
+	RESTORE_REG(ar.unat, r25, r17);;
+	RESTORE_REG(ar.bspstore, r25, r17);;
+	RESTORE_REG(cr.dcr, r25, r17);;
+	RESTORE_REG(cr.iva, r25, r17);;
+	RESTORE_REG(cr.pta, r25, r17);;
+	RESTORE_REG(cr.itv, r25, r17);;
+	RESTORE_REG(cr.pmv, r25, r17);;
+	RESTORE_REG(cr.cmcv, r25, r17);;
+	RESTORE_REG(cr.lrr0, r25, r17);;
+	RESTORE_REG(cr.lrr1, r25, r17);;
+	ld8 r4=[r25],0x08;;
+	ld8 r5=[r25],0x08;;
+	ld8 r6=[r25],0x08;;
+	ld8 r7=[r25],0x08;;
+	ld8 r17=[r25],0x08;;
+	mov pr=r17,-1;;
+	RESTORE_REG(ar.lc, r25, r17);;
+	/*
+	 * Now Restore floating point regs
+	 */
+	ldf.fill.nta f2=[r25],16;;
+	ldf.fill.nta f3=[r25],16;;
+	ldf.fill.nta f4=[r25],16;;
+	ldf.fill.nta f5=[r25],16;;
+	ldf.fill.nta f16=[r25],16;;
+	ldf.fill.nta f17=[r25],16;;
+	ldf.fill.nta f18=[r25],16;;
+	ldf.fill.nta f19=[r25],16;;
+	ldf.fill.nta f20=[r25],16;;
+	ldf.fill.nta f21=[r25],16;;
+	ldf.fill.nta f22=[r25],16;;
+	ldf.fill.nta f23=[r25],16;;
+	ldf.fill.nta f24=[r25],16;;
+	ldf.fill.nta f25=[r25],16;;
+	ldf.fill.nta f26=[r25],16;;
+	ldf.fill.nta f27=[r25],16;;
+	ldf.fill.nta f28=[r25],16;;
+	ldf.fill.nta f29=[r25],16;;
+	ldf.fill.nta f30=[r25],16;;
+	ldf.fill.nta f31=[r25],16;;
+
+	/*
+	 * Now that we have done all the register restores
+	 * we are now ready for the big DIVE to SAL Land
+	 */
+	ssm psr.ic;;
+	srlz.d;;
+	br.ret.sptk.many b0;;
+END(ia64_jump_to_sal)
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#endif /* CONFIG_SMP */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S b/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
new file mode 100644
index 0000000000..7bc41eaa95
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
@@ -0,0 +1,303 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
+ *	David Mosberger <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 05/22/2000 eranian Added support for stacked register calls
+ * 05/24/2000 eranian Added support for physical mode static calls
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+	.data
+	.globl pal_entry_point
+pal_entry_point:
+	data8 ia64_pal_default_handler
+	.text
+
+/*
+ * Set the PAL entry point address.  This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0		Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+GLOBAL_ENTRY(ia64_pal_handler_init)
+	alloc r3=ar.pfs,1,0,0,0
+	movl r2=pal_entry_point
+	;;
+	st8 [r2]=in0
+	br.ret.sptk.many rp
+END(ia64_pal_handler_init)
+
+/*
+ * Default PAL call handler.  This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+GLOBAL_ENTRY(ia64_pal_default_handler)
+	mov r8=-1
+	br.cond.sptk.many rp
+END(ia64_pal_default_handler)
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0         Index of PAL service
+ * in1 - in3   Remaining PAL arguments
+ * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
+ *
+ */
+GLOBAL_ENTRY(__ia64_pal_call_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc loc1 = ar.pfs,5,5,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28 = in0
+	  mov r29 = in1
+	  mov r8 = ip
+	}
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	tbit.nz p6,p7 = in4, 0
+	adds r8 = 1f-1b,r8
+	mov loc4=ar.rsc			// save RSE configuration
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov loc3 = psr
+	mov loc0 = rp
+	.body
+	mov r30 = in2
+
+(p6)	rsm psr.i | psr.ic
+	mov r31 = in3
+	mov b7 = loc2
+
+(p7)	rsm psr.i
+	;;
+(p6)	srlz.i
+	mov rp = r8
+	br.cond.sptk.many b7
+1:	mov psr.l = loc3
+	mov ar.rsc = loc4		// restore RSE configuration
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(__ia64_pal_call_static)
+
+/*
+ * Make a PAL call using the stacked registers calling convention.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,4,4,0
+	movl loc2 = pal_entry_point
+
+	mov r28  = in0			// Index MUST be copied to r28
+	mov out0 = in0			// AND in0 of PAL function
+	mov loc0 = rp
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out1 = in1
+	mov out2 = in2
+	mov out3 = in3
+	mov loc3 = psr
+	;;
+	rsm psr.i
+	mov b7 = loc2
+	;;
+	br.call.sptk.many rp=b7		// now make the call
+.ret0:	mov psr.l  = loc3
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// serialize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_stacked)
+
+/*
+ * Make a physical mode PAL call using the static registers calling convention.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ *
+ * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
+ * So we don't need to clear them.
+ */
+#define PAL_PSR_BITS_TO_CLEAR							\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PAL_PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+
+GLOBAL_ENTRY(ia64_pal_call_phys_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,7,0,0
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov r8   = ip			// save ip to compute branch
+	  mov loc0 = rp			// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov r29  = in1			// first argument
+	mov r30  = in2			// copy arg2
+	mov r31  = in3			// copy arg3
+	;;
+	mov loc3 = psr			// save psr
+	adds r8  = 1f-1b,r8		// calculate return address for call
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	tpa r8=r8			// convert rp to physical
+	;;
+	mov b7 = loc2			// install target to branch reg
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret1:	mov rp = r8			// install return address (physical)
+	mov loc5 = r19
+	mov loc6 = r20
+	br.cond.sptk.many b7
+1:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
+.ret2:
+	mov psr.l = loc3		// restore init PSR
+
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc	loc1 = ar.pfs,5,7,4,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp		// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out0 = in0		// first argument
+	mov out1 = in1		// copy arg2
+	mov out2 = in2		// copy arg3
+	mov out3 = in3		// copy arg3
+	;;
+	mov loc3 = psr		// save psr
+	;;
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.many rp=ia64_switch_mode_phys
+.ret6:
+	mov loc5 = r19
+	mov loc6 = r20
+	br.call.sptk.many rp=b7		// now make the call
+.ret7:
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	mov r19=loc5
+	mov r20=loc6
+	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
+
+.ret8:	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(ia64_pal_call_phys_stacked)
+
+/*
+ * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
+ *
+ * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
+ * regs fp-low partition.
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_save_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	stf.spill [in0] = f10,32
+	stf.spill [r2]  = f11,32
+	;;
+	stf.spill [in0] = f12,32
+	stf.spill [r2]  = f13,32
+	;;
+	stf.spill [in0] = f14,32
+	stf.spill [r2]  = f15,32
+	br.ret.sptk.many rp
+END(ia64_save_scratch_fpregs)
+
+/*
+ * Load scratch fp scratch regs (fp10-fp15)
+ *
+ * Inputs:
+ *      in0	Address of stack storage for fp regs
+ */
+GLOBAL_ENTRY(ia64_load_scratch_fpregs)
+	alloc r3=ar.pfs,1,0,0,0
+	add r2=16,in0
+	;;
+	ldf.fill  f10 = [in0],32
+	ldf.fill  f11 = [r2],32
+	;;
+	ldf.fill  f12 = [in0],32
+	ldf.fill  f13 = [r2],32
+	;;
+	ldf.fill  f14 = [in0],32
+	ldf.fill  f15 = [r2],32
+	br.ret.sptk.many rp
+END(ia64_load_scratch_fpregs)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
new file mode 100644
index 0000000000..3fcff8996a
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
@@ -0,0 +1,792 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, 2004 Intel Corp
+ * 	Rohit Seth <rohit.seth@intel.com>
+ * 	Suresh Siddha <suresh.b.siddha@intel.com>
+ * 	Gordon Jin <gordon.jin@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 12/26/04 S.Siddha, G.Jin, R.Seth
+ *			Add multi-threading and multi-core detection
+ * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
+ * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
+ * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
+ * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian	added the support for command line argument
+ * 06/24/99 W.Drummond	added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/efi.h>
+#include <linux/initrd.h>
+
+#include <asm/ia32.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/serial.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+# error "struct cpuinfo_ia64 too big!"
+#endif
+
+#ifdef CONFIG_SMP
+unsigned long __per_cpu_offset[NR_CPUS];
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param *ia64_boot_param;
+struct screen_info screen_info;
+
+unsigned long ia64_max_cacheline_size;
+unsigned long ia64_iobase;	/* virtual address for I/O accesses */
+EXPORT_SYMBOL(ia64_iobase);
+struct io_space io_space[MAX_IO_SPACES];
+EXPORT_SYMBOL(io_space);
+unsigned int num_io_spaces;
+
+/*
+ * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
+ * mask specifies a mask of address bits that must be 0 in order for two buffers to be
+ * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
+ * address of the second buffer must be aligned to (merge_mask+1) in order to be
+ * mergeable).  By default, we assume there is no I/O MMU which can merge physically
+ * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
+ * page-size of 2^64.
+ */
+unsigned long ia64_max_iommu_merge_mask = ~0UL;
+EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
+
+/*
+ * We use a special marker for the end of memory and it uses the extra (+1) slot
+ */
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
+int num_rsvd_regions;
+
+
+/*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+ * caller-specified function is called with the memory ranges that remain after filtering.
+ * This routine does not assume the incoming segments are sorted.
+ */
+int
+filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long range_start, range_end, prev_start;
+	void (*func)(unsigned long, unsigned long, int);
+	int i;
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	/*
+	 * lowest possible address(walker uses virtual)
+	 */
+	prev_start = PAGE_OFFSET;
+	func = arg;
+
+	for (i = 0; i < num_rsvd_regions; ++i) {
+		range_start = max(start, prev_start);
+		range_end   = min(end, rsvd_region[i].start);
+
+		if (range_start < range_end)
+			call_pernode_memory(__pa(range_start), range_end - range_start, func);
+
+		/* nothing more available in this segment */
+		if (range_end == end) return 0;
+
+		prev_start = rsvd_region[i].end;
+	}
+	/* end of memory marker allows full processing inside loop body */
+	return 0;
+}
+
+static void
+sort_regions (struct rsvd_region *rsvd_region, int max)
+{
+	int j;
+
+	/* simple bubble sorting */
+	while (max--) {
+		for (j = 0; j < max; ++j) {
+			if (rsvd_region[j].start > rsvd_region[j+1].start) {
+				struct rsvd_region tmp;
+				tmp = rsvd_region[j];
+				rsvd_region[j] = rsvd_region[j + 1];
+				rsvd_region[j + 1] = tmp;
+			}
+		}
+	}
+}
+
+/**
+ * reserve_memory - setup reserved memory areas
+ *
+ * Setup the reserved memory areas set aside for the boot parameters,
+ * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
+ * see include/asm-ia64/meminit.h if you need to define more.
+ */
+void
+reserve_memory (void)
+{
+	int n = 0;
+
+	/*
+	 * none of the entries in this table overlap
+	 */
+	rsvd_region[n].start = (unsigned long) ia64_boot_param;
+	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
+	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
+	rsvd_region[n].end   = (rsvd_region[n].start
+				+ strlen(__va(ia64_boot_param->command_line)) + 1);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
+	n++;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
+		n++;
+	}
+#endif
+
+	/* end of memory marker */
+	rsvd_region[n].start = ~0UL;
+	rsvd_region[n].end   = ~0UL;
+	n++;
+
+	num_rsvd_regions = n;
+
+	sort_regions(rsvd_region, num_rsvd_regions);
+}
+
+/**
+ * find_initrd - get initrd parameters from the boot parameter structure
+ *
+ * Grab the initrd start and end from the boot parameter struct given us by
+ * the boot loader.
+ */
+void
+find_initrd (void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
+
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		       initrd_start, ia64_boot_param->initrd_size);
+	}
+#endif
+}
+
+static void __init
+io_port_init (void)
+{
+	extern unsigned long ia64_iobase;
+	unsigned long phys_iobase;
+
+	/*
+	 *  Set `iobase' to the appropriate address in region 6 (uncached access range).
+	 *
+	 *  The EFI memory map is the "preferred" location to get the I/O port space base,
+	 *  rather the relying on AR.KR0. This should become more clear in future SAL
+	 *  specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
+	 *  found in the memory map.
+	 */
+	phys_iobase = efi_get_iobase();
+	if (phys_iobase)
+		/* set AR.KR0 since this is all we use it for anyway */
+		ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
+	else {
+		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
+		printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
+		       "to AR.KR0\n");
+		printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
+	}
+	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
+
+	/* setup legacy IO port space */
+	io_space[0].mmio_base = ia64_iobase;
+	io_space[0].sparse = 1;
+	num_io_spaces = 1;
+}
+
+/**
+ * early_console_setup - setup debugging console
+ *
+ * Consoles started here require little enough setup that we can start using
+ * them very early in the boot process, either right after the machine
+ * vector initialization, or even before if the drivers can detect their hw.
+ *
+ * Returns non-zero if a console couldn't be setup.
+ */
+static inline int __init
+early_console_setup (char *cmdline)
+{
+#ifdef CONFIG_XEN
+	if (!early_xen_console_setup(cmdline)) return 0;
+#endif
+#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
+	{
+		extern int sn_serial_console_early_setup(void);
+		if (!sn_serial_console_early_setup())
+			return 0;
+	}
+#endif
+#ifdef CONFIG_EFI_PCDP
+	if (!efi_setup_pcdp_console(cmdline))
+		return 0;
+#endif
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+	if (!early_serial_console_init(cmdline))
+		return 0;
+#endif
+
+	return -1;
+}
+
+static inline void
+mark_bsp_online (void)
+{
+#ifdef CONFIG_SMP
+	/* If we register an early console, allow CPU 0 to printk */
+	cpu_set(smp_processor_id(), cpu_online_map);
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void
+check_for_logical_procs (void)
+{
+	pal_logical_to_physical_t info;
+	s64 status;
+
+	status = ia64_pal_logical_to_phys(0, &info);
+	if (status == -1) {
+		printk(KERN_INFO "No logical to physical processor mapping "
+		       "available\n");
+		return;
+	}
+	if (status) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	/*
+	 * Total number of siblings that BSP has.  Though not all of them 
+	 * may have booted successfully. The correct number of siblings 
+	 * booted is in info.overview_num_log.
+	 */
+	smp_num_siblings = info.overview_tpc;
+	smp_num_cpucores = info.overview_cpp;
+}
+#endif
+
+void __init
+setup_arch (char **cmdline_p)
+{
+	unw_init();
+
+	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+
+	*cmdline_p = __va(ia64_boot_param->command_line);
+	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+	efi_init();
+	io_port_init();
+
+#ifdef CONFIG_IA64_GENERIC
+	{
+		const char *mvec_name = strstr (*cmdline_p, "machvec=");
+		char str[64];
+
+		if (mvec_name) {
+			const char *end;
+			size_t len;
+
+			mvec_name += 8;
+			end = strchr (mvec_name, ' ');
+			if (end)
+				len = end - mvec_name;
+			else
+				len = strlen (mvec_name);
+			len = min(len, sizeof (str) - 1);
+			strncpy (str, mvec_name, len);
+			str[len] = '\0';
+			mvec_name = str;
+		} else
+			mvec_name = acpi_get_sysname();
+		machvec_init(mvec_name);
+	}
+#endif
+
+	if (early_console_setup(*cmdline_p) == 0)
+		mark_bsp_online();
+
+#ifdef CONFIG_ACPI_BOOT
+	/* Initialize the ACPI boot-time table parser */
+	acpi_table_init();
+# ifdef CONFIG_ACPI_NUMA
+	acpi_numa_init();
+# endif
+#else
+# ifdef CONFIG_SMP
+	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
+# endif
+#endif /* CONFIG_APCI_BOOT */
+
+	find_memory();
+
+	/* process SAL system table: */
+	ia64_sal_init(efi.sal_systab);
+
+#ifdef CONFIG_SMP
+	cpu_physical_id(0) = hard_smp_processor_id();
+
+	cpu_set(0, cpu_sibling_map[0]);
+	cpu_set(0, cpu_core_map[0]);
+
+	check_for_logical_procs();
+	if (smp_num_cpucores > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Core capable: number of cores=%d\n",
+		       smp_num_cpucores);
+	if (smp_num_siblings > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
+		       smp_num_siblings);
+#endif
+
+	cpu_init();	/* initialize the bootstrap CPU */
+
+#ifdef CONFIG_ACPI_BOOT
+	acpi_boot_init();
+#endif
+
+#ifdef CONFIG_VT
+	if (!conswitchp) {
+# if defined(CONFIG_DUMMY_CONSOLE)
+		conswitchp = &dummy_con;
+# endif
+# if defined(CONFIG_VGA_CONSOLE)
+		/*
+		 * Non-legacy systems may route legacy VGA MMIO range to system
+		 * memory.  vga_con probes the MMIO hole, so memory looks like
+		 * a VGA device to it.  The EFI memory map can tell us if it's
+		 * memory so we can avoid this problem.
+		 */
+		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
+			conswitchp = &vga_con;
+# endif
+	}
+#endif
+
+	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (!strstr(saved_command_line, "nomca"))
+		ia64_mca_init();
+
+	platform_setup(cmdline_p);
+	paging_init();
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+static int
+show_cpuinfo (struct seq_file *m, void *v)
+{
+#ifdef CONFIG_SMP
+#	define lpj	c->loops_per_jiffy
+#	define cpunum	c->cpu
+#else
+#	define lpj	loops_per_jiffy
+#	define cpunum	0
+#endif
+	static struct {
+		unsigned long mask;
+		const char *feature_name;
+	} feature_bits[] = {
+		{ 1UL << 0, "branchlong" },
+		{ 1UL << 1, "spontaneous deferral"},
+		{ 1UL << 2, "16-byte atomic ops" }
+	};
+	char family[32], features[128], *cp, sep;
+	struct cpuinfo_ia64 *c = v;
+	unsigned long mask;
+	int i;
+
+	mask = c->features;
+
+	switch (c->family) {
+	      case 0x07:	memcpy(family, "Itanium", 8); break;
+	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
+	      default:		sprintf(family, "%u", c->family); break;
+	}
+
+	/* build the feature string: */
+	memcpy(features, " standard", 10);
+	cp = features;
+	sep = 0;
+	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
+		if (mask & feature_bits[i].mask) {
+			if (sep)
+				*cp++ = sep;
+			sep = ',';
+			*cp++ = ' ';
+			strcpy(cp, feature_bits[i].feature_name);
+			cp += strlen(feature_bits[i].feature_name);
+			mask &= ~feature_bits[i].mask;
+		}
+	}
+	if (mask) {
+		/* print unknown features as a hex value: */
+		if (sep)
+			*cp++ = sep;
+		sprintf(cp, " 0x%lx", mask);
+	}
+
+	seq_printf(m,
+		   "processor  : %d\n"
+		   "vendor     : %s\n"
+		   "arch       : IA-64\n"
+		   "family     : %s\n"
+		   "model      : %u\n"
+		   "revision   : %u\n"
+		   "archrev    : %u\n"
+		   "features   :%s\n"	/* don't change this---it _is_ right! */
+		   "cpu number : %lu\n"
+		   "cpu regs   : %u\n"
+		   "cpu MHz    : %lu.%06lu\n"
+		   "itc MHz    : %lu.%06lu\n"
+		   "BogoMIPS   : %lu.%02lu\n",
+		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+		   features, c->ppn, c->number,
+		   c->proc_freq / 1000000, c->proc_freq % 1000000,
+		   c->itc_freq / 1000000, c->itc_freq % 1000000,
+		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+#ifdef CONFIG_SMP
+	seq_printf(m, "siblings   : %u\n", c->num_log);
+	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
+		seq_printf(m,
+		   	   "physical id: %u\n"
+		   	   "core id    : %u\n"
+		   	   "thread id  : %u\n",
+		   	   c->socket_id, c->core_id, c->thread_id);
+#endif
+	seq_printf(m,"\n");
+
+	return 0;
+}
+
+static void *
+c_start (struct seq_file *m, loff_t *pos)
+{
+#ifdef CONFIG_SMP
+	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+		++*pos;
+#endif
+	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+}
+
+static void *
+c_next (struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void
+c_stop (struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start =	c_start,
+	.next =		c_next,
+	.stop =		c_stop,
+	.show =		show_cpuinfo
+};
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+	union {
+		unsigned long bits[5];
+		struct {
+			/* id 0 & 1: */
+			char vendor[16];
+
+			/* id 2 */
+			u64 ppn;		/* processor serial number */
+
+			/* id 3: */
+			unsigned number		:  8;
+			unsigned revision	:  8;
+			unsigned model		:  8;
+			unsigned family		:  8;
+			unsigned archrev	:  8;
+			unsigned reserved	: 24;
+
+			/* id 4: */
+			u64 features;
+		} field;
+	} cpuid;
+	pal_vm_info_1_u_t vm1;
+	pal_vm_info_2_u_t vm2;
+	pal_status_t status;
+	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
+	int i;
+
+	for (i = 0; i < 5; ++i)
+		cpuid.bits[i] = ia64_get_cpuid(i);
+
+	memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_SMP
+	c->cpu = smp_processor_id();
+
+	/* below default values will be overwritten  by identify_siblings() 
+	 * for Multi-Threading/Multi-Core capable cpu's
+	 */
+	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
+	c->socket_id = -1;
+
+	identify_siblings(c);
+#endif
+	c->ppn = cpuid.field.ppn;
+	c->number = cpuid.field.number;
+	c->revision = cpuid.field.revision;
+	c->model = cpuid.field.model;
+	c->family = cpuid.field.family;
+	c->archrev = cpuid.field.archrev;
+	c->features = cpuid.field.features;
+
+	status = ia64_pal_vm_summary(&vm1, &vm2);
+	if (status == PAL_STATUS_SUCCESS) {
+		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
+		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
+	}
+	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
+}
+
+void
+setup_per_cpu_areas (void)
+{
+	/* start_kernel() requires this... */
+}
+
+static void
+get_max_cacheline_size (void)
+{
+	unsigned long line_size, max = 1;
+	u64 l, levels, unique_caches;
+        pal_cache_config_info_t cci;
+        s64 status;
+
+        status = ia64_pal_cache_summary(&levels, &unique_caches);
+        if (status != 0) {
+                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+                       __FUNCTION__, status);
+                max = SMP_CACHE_BYTES;
+		goto out;
+        }
+
+	for (l = 0; l < levels; ++l) {
+		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
+						    &cci);
+		if (status != 0) {
+			printk(KERN_ERR
+			       "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+			       __FUNCTION__, l, status);
+			max = SMP_CACHE_BYTES;
+		}
+		line_size = 1 << cci.pcci_line_size;
+		if (line_size > max)
+			max = line_size;
+        }
+  out:
+	if (max > ia64_max_cacheline_size)
+		ia64_max_cacheline_size = max;
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU.  This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+	extern void __devinit ia64_mmu_init (void *);
+	unsigned long num_phys_stacked;
+	pal_vm_info_2_u_t vmi;
+	unsigned int max_ctx;
+	struct cpuinfo_ia64 *cpu_info;
+	void *cpu_data;
+
+	cpu_data = per_cpu_init();
+
+	/*
+	 * We set ar.k3 so that assembly code in MCA handler can compute
+	 * physical addresses of per cpu variables with a simple:
+	 *   phys = ar.k3 + &per_cpu_var
+	 */
+	ia64_set_kr(IA64_KR_PER_CPU_DATA,
+		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+
+	get_max_cacheline_size();
+
+	/*
+	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
+	 * depends on the data returned by identify_cpu().  We break the dependency by
+	 * accessing cpu_data() through the canonical per-CPU address.
+	 */
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+	identify_cpu(cpu_info);
+
+#ifdef CONFIG_MCKINLEY
+	{
+#		define FEATURE_SET 16
+		struct ia64_pal_retval iprv;
+
+		if (cpu_info->family == 0x1f) {
+			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
+			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
+				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
+				              (iprv.v1 | 0x80), FEATURE_SET, 0);
+		}
+	}
+#endif
+
+	/* Clear the stack memory reserved for pt_regs: */
+	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
+
+	/*
+	 * Initialize the page-table base register to a global
+	 * directory with all zeroes.  This ensure that we can handle
+	 * TLB-misses to user address-space even before we created the
+	 * first user address-space.  This may happen, e.g., due to
+	 * aggressive use of lfetch.fault.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
+
+	/*
+	 * Initialize default control register to defer speculative faults except
+	 * for those arising from TLB misses, which are not deferred.  The
+	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
+	 * the kernel must have recovery code for all speculative accesses).  Turn on
+	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
+	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
+	 * be fine).
+	 */
+	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if (current->mm)
+		BUG();
+
+	ia64_mmu_init(ia64_imva(cpu_data));
+	ia64_mca_cpu_init(ia64_imva(cpu_data));
+
+#ifdef CONFIG_IA32_SUPPORT
+	ia32_cpu_init();
+#endif
+
+	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
+	ia64_set_itc(0);
+
+	/* disable all local interrupt sources: */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* clear TPR & XTP to enable all interrupt classes: */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+#ifdef CONFIG_SMP
+	normal_xtp();
+#endif
+
+	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+	else {
+		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+		max_ctx = (1U << 15) - 1;	/* use architected minimum */
+	}
+	while (max_ctx < ia64_ctx.max_ctx) {
+		unsigned int old = ia64_ctx.max_ctx;
+		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+			break;
+	}
+
+	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
+		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
+		       "stacked regs\n");
+		num_phys_stacked = 96;
+	}
+	/* size of physical stacked register partition plus 8 bytes: */
+	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+	platform_cpu_init();
+}
+
+void
+check_bugs (void)
+{
+	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+			       (unsigned long) __end___mckinley_e9_bundles);
+}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
new file mode 100644
index 0000000000..0ecefd3c89
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Xen components
+#
+
+obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o xenconsole.o
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
new file mode 100644
index 0000000000..2cadc70a77
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
@@ -0,0 +1,268 @@
+/*
+ * Support routines for Xen hypercalls
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(xen_get_ivr)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=cr.ivr;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_GET_IVR
+	;;
+	st8 [r9]=r10
+	br.ret.sptk.many rp
+	;;
+END(xen_get_ivr)
+
+GLOBAL_ENTRY(xen_get_tpr)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=cr.tpr;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_GET_TPR
+	;;
+	st8 [r9]=r10
+	br.ret.sptk.many rp
+	;;
+END(xen_get_tpr)
+
+GLOBAL_ENTRY(xen_set_tpr)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov cr.tpr=r32;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	mov r8=r32
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_SET_TPR
+	;;
+	st8 [r9]=r10
+	br.ret.sptk.many rp
+	;;
+END(xen_set_tpr)
+
+GLOBAL_ENTRY(xen_eoi)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov cr.eoi=r0;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	mov r8=r32
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_EOI
+	;;
+	st8 [r9]=r10
+	br.ret.sptk.many rp
+	;;
+END(xen_eoi)
+
+GLOBAL_ENTRY(xen_thash)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	thash r8=r32;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	mov r8=r32
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_THASH
+	;;
+	st8 [r9]=r10
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_thash)
+
+GLOBAL_ENTRY(xen_set_itm)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov cr.itm=r32;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	mov r8=r32
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_SET_ITM
+	;;
+	st8 [r9]=r10
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_set_itm)
+
+GLOBAL_ENTRY(xen_ptcga)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	ptc.ga r32,r33;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r11=XSI_PSR_IC
+	mov r8=r32
+	mov r9=r33
+	;;
+	ld8 r10=[r11]
+	;;
+	st8 [r11]=r0
+	;;
+	XEN_HYPER_PTC_GA
+	;;
+	st8 [r11]=r10
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_ptcga)
+
+GLOBAL_ENTRY(xen_get_rr)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=rr[r32];;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r9=XSI_PSR_IC
+	mov r8=r32
+	;;
+	ld8 r10=[r9]
+	;;
+	st8 [r9]=r0
+	;;
+	XEN_HYPER_GET_RR
+	;;
+	st8 [r9]=r10
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_get_rr)
+
+GLOBAL_ENTRY(xen_set_rr)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov rr[r32]=r33;;
+(p7)	br.ret.sptk.many rp
+	;;
+	movl r11=XSI_PSR_IC
+	mov r8=r32
+	mov r9=r33
+	;;
+	ld8 r10=[r11]
+	;;
+	st8 [r11]=r0
+	;;
+	XEN_HYPER_SET_RR
+	;;
+	st8 [r11]=r10
+	;;
+	br.ret.sptk.many rp
+	;;
+END(xen_set_rr)
+
+GLOBAL_ENTRY(xen_fc)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	fc r32;;
+(p7)	br.ret.sptk.many rp
+	;;
+	ptc.e r96		// this is a "privified" fc r32
+	;;
+	br.ret.sptk.many rp
+END(xen_fc)
+
+GLOBAL_ENTRY(xen_get_cpuid)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=cpuid[r32];;
+(p7)	br.ret.sptk.many rp
+	;;
+	mov r72=rr[r32]		// this is a "privified" mov r8=cpuid[r32]
+	;;
+	br.ret.sptk.many rp
+END(xen_get_cpuid)
+
+GLOBAL_ENTRY(xen_get_pmd)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=pmd[r32];;
+(p7)	br.ret.sptk.many rp
+	;;
+	mov r72=pmc[r32] 	// this is a "privified" mov r8=pmd[r32]
+	;;
+	br.ret.sptk.many rp
+END(xen_get_pmd)
+
+#ifdef CONFIG_IA32_SUPPORT
+GLOBAL_ENTRY(xen_get_eflag)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov r8=ar24;;
+(p7)	br.ret.sptk.many rp
+	;;
+	mov ar24=r72		// this is a "privified" mov r8=ar.eflg
+	;;
+	br.ret.sptk.many rp
+END(xen_get_eflag)
+// some bits aren't set if pl!=0, see SDM vol1 3.1.8
+GLOBAL_ENTRY(xen_set_eflag)
+	movl r8=running_on_xen;;
+	ld4 r8=[r8];;
+	cmp.eq p7,p0=r8,r0;;
+(p7)	mov ar24=r32
+(p7)	br.ret.sptk.many rp
+	;;
+	// FIXME: this remains no-op'd because it generates
+	// a privileged register (general exception) trap rather than
+	// a privileged operation fault
+	//mov ar24=r32
+	;;
+	br.ret.sptk.many rp
+END(xen_get_eflag)
+#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenconsole.c b/linux-2.6-xen-sparse/arch/ia64/xen/xenconsole.c
new file mode 100644
index 0000000000..19762d844a
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenconsole.c
@@ -0,0 +1,18 @@
+#include <linux/config.h>
+#include <linux/console.h>
+
+int
+early_xen_console_setup (char *cmdline)
+{
+#ifdef CONFIG_XEN
+#ifndef CONFIG_IA64_HP_SIM
+	extern int running_on_xen;
+	if (running_on_xen) {
+		extern struct console hpsim_cons;
+		register_console(&hpsim_cons);
+		return 0;
+	}
+#endif
+#endif
+	return -1;
+}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
new file mode 100644
index 0000000000..cdc0e85b03
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
@@ -0,0 +1,860 @@
+/*
+ * ia64/xen/entry.S
+ *
+ * Alternate kernel routines for Xen.  Heavily leveraged from
+ *   ia64/kernel/entry.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+
+#ifdef CONFIG_XEN
+#include "xenminstate.h"
+#else
+#include "minstate.h"
+#endif
+
+/*
+ * prev_task <- ia64_switch_to(struct task_struct *next)
+ *	With Ingo's new scheduler, interrupts are disabled when this routine gets
+ *	called.  The code starting at .map relies on this.  The rest of the code
+ *	doesn't care about the interrupt masking status.
+ */
+#ifdef CONFIG_XEN
+GLOBAL_ENTRY(xen_switch_to)
+	.prologue
+	alloc r16=ar.pfs,1,0,0,0
+	movl r22=running_on_xen;;
+	ld4 r22=[r22];;
+	cmp.eq p7,p0=r22,r0
+(p7)	br.cond.sptk.many __ia64_switch_to;;
+#else
+GLOBAL_ENTRY(ia64_switch_to)
+	.prologue
+	alloc r16=ar.pfs,1,0,0,0
+#endif
+	.prologue
+	alloc r16=ar.pfs,1,0,0,0
+	DO_SAVE_SWITCH_STACK
+	.body
+
+	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	movl r25=init_task
+#ifdef CONFIG_XEN
+	movl r27=XSI_KR0+(IA64_KR_CURRENT_STACK*8)
+	;;
+	ld8 r27=[r27]
+#else
+	mov r27=IA64_KR(CURRENT_STACK)
+#endif
+	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+	dep r20=0,in0,61,3		// physical address of "next"
+	;;
+	st8 [r22]=sp			// save kernel stack pointer of old task
+	shr.u r26=r20,IA64_GRANULE_SHIFT
+	cmp.eq p7,p6=r25,in0
+	;;
+	/*
+	 * If we've already mapped this task's page, we can skip doing it again.
+	 */
+(p6)	cmp.eq p7,p6=r26,r27
+(p6)	br.cond.dpnt .map
+	;;
+.done:
+#ifdef CONFIG_XEN
+	movl r27=XSI_PSR_IC
+	mov r8=1
+	;;
+(p6)	st4 [r27]=r8
+	;;
+#else
+(p6)	ssm psr.ic			// if we had to map, reenable the psr.ic bit FIRST!!!
+	;;
+(p6)	srlz.d
+#endif
+	ld8 sp=[r21]			// load kernel stack pointer of new task
+#ifdef CONFIG_XEN
+	movl r8=XSI_KR0+(IA64_KR_CURRENT*8)
+	;;
+	st8 [r8]=in0
+#else
+	mov IA64_KR(CURRENT)=in0	// update "current" application register
+#endif
+	mov r8=r13			// return pointer to previously running task
+	mov r13=in0			// set "current" pointer
+	;;
+	DO_LOAD_SWITCH_STACK
+
+#ifdef CONFIG_SMP
+	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+	br.ret.sptk.many rp		// boogie on out in new context
+
+.map:
+#ifdef CONFIG_XEN
+	movl r27=XSI_PSR_IC
+	;;
+	st4 [r27]=r0
+#else
+	rsm psr.ic			// interrupts (psr.i) are already disabled here
+#endif
+	movl r25=PAGE_KERNEL
+	;;
+	srlz.d
+	or r23=r25,r20			// construct PA | page properties
+	mov r25=IA64_GRANULE_SHIFT<<2
+	;;
+#ifdef CONFIG_XEN
+	movl r8=XSI_ITIR
+	;;
+	st8 [r8]=r25
+	;;
+	movl r8=XSI_IFA
+	;;
+	st8 [r8]=in0			 // VA of next task...
+	;;
+	mov r25=IA64_TR_CURRENT_STACK
+	movl r8=XSI_KR0+(IA64_KR_CURRENT_STACK*8)
+	;;
+	st8 [r8]=r26
+#else
+	mov cr.itir=r25
+	mov cr.ifa=in0			// VA of next task...
+	;;
+	mov r25=IA64_TR_CURRENT_STACK
+	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
+#endif
+	;;
+	itr.d dtr[r25]=r23		// wire in new mapping...
+	br.cond.sptk .done
+#ifdef CONFIG_XEN
+END(xen_switch_to)
+#else
+END(ia64_switch_to)
+#endif
+
+	/*
+	 * Invoke a system call, but do some tracing before and after the call.
+	 * We MUST preserve the current register frame throughout this routine
+	 * because some system calls (such as ia64_execve) directly
+	 * manipulate ar.pfs.
+	 */
+#ifdef CONFIG_XEN
+GLOBAL_ENTRY(xen_trace_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	movl r16=running_on_xen;;
+	ld4 r16=[r16];;
+	cmp.eq p7,p0=r16,r0
+(p7)	br.cond.sptk.many __ia64_trace_syscall;;
+#else
+GLOBAL_ENTRY(ia64_trace_syscall)
+	PT_REGS_UNWIND_INFO(0)
+#endif
+	/*
+	 * We need to preserve the scratch registers f6-f11 in case the system
+	 * call is sigreturn.
+	 */
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+ 	stf.spill [r16]=f6,32
+ 	stf.spill [r17]=f7,32
+	;;
+ 	stf.spill [r16]=f8,32
+ 	stf.spill [r17]=f9,32
+	;;
+ 	stf.spill [r16]=f10
+ 	stf.spill [r17]=f11
+	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+	adds r16=PT(F6)+16,sp
+	adds r17=PT(F7)+16,sp
+	;;
+	ldf.fill f6=[r16],32
+	ldf.fill f7=[r17],32
+	;;
+	ldf.fill f8=[r16],32
+	ldf.fill f9=[r17],32
+	;;
+	ldf.fill f10=[r16]
+	ldf.fill f11=[r17]
+	// the syscall number may have changed, so re-load it and re-calculate the
+	// syscall entry-point:
+	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
+	;;
+	ld8 r15=[r15]
+	mov r3=NR_syscalls - 1
+	;;
+	adds r15=-1024,r15
+	movl r16=sys_call_table
+	;;
+	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
+	cmp.leu p6,p7=r15,r3
+	;;
+(p6)	ld8 r20=[r20]				// load address of syscall entry point
+(p7)	movl r20=sys_ni_syscall
+	;;
+	mov b6=r20
+	br.call.sptk.many rp=b6			// do the syscall
+.strace_check_retval:
+	cmp.lt p6,p0=r8,r0			// syscall failed?
+	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
+	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk strace_error		// syscall failed ->
+	;;					// avoid RAW on r10
+.strace_save_retval:
+.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
+.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
+	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
+.ret3:	br.cond.sptk .work_pending_syscall_end
+
+strace_error:
+	ld8 r3=[r2]				// load pt_regs.r8
+	sub r9=0,r8				// negate return value to get errno value
+	;;
+	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
+	adds r3=16,r2				// r3=&pt_regs.r10
+	;;
+(p6)	mov r10=-1
+(p6)	mov r8=r9
+	br.cond.sptk .strace_save_retval
+#ifdef CONFIG_XEN
+END(xen_trace_syscall)
+#else
+END(ia64_trace_syscall)
+#endif
+
+/*
+ * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
+ *	need to switch to bank 0 and doesn't restore the scratch registers.
+ *	To avoid leaking kernel bits, the scratch registers are set to
+ *	the following known-to-be-safe values:
+ *
+ *		  r1: restored (global pointer)
+ *		  r2: cleared
+ *		  r3: 1 (when returning to user-level)
+ *	      r8-r11: restored (syscall return value(s))
+ *		 r12: restored (user-level stack pointer)
+ *		 r13: restored (user-level thread pointer)
+ *		 r14: cleared
+ *		 r15: restored (syscall #)
+ *	     r16-r17: cleared
+ *		 r18: user-level b6
+ *		 r19: cleared
+ *		 r20: user-level ar.fpsr
+ *		 r21: user-level b0
+ *		 r22: cleared
+ *		 r23: user-level ar.bspstore
+ *		 r24: user-level ar.rnat
+ *		 r25: user-level ar.unat
+ *		 r26: user-level ar.pfs
+ *		 r27: user-level ar.rsc
+ *		 r28: user-level ip
+ *		 r29: user-level psr
+ *		 r30: user-level cfm
+ *		 r31: user-level pr
+ *	      f6-f11: cleared
+ *		  pr: restored (user-level pr)
+ *		  b0: restored (user-level rp)
+ *	          b6: restored
+ *		  b7: cleared
+ *	     ar.unat: restored (user-level ar.unat)
+ *	      ar.pfs: restored (user-level ar.pfs)
+ *	      ar.rsc: restored (user-level ar.rsc)
+ *	     ar.rnat: restored (user-level ar.rnat)
+ *	 ar.bspstore: restored (user-level ar.bspstore)
+ *	     ar.fpsr: restored (user-level ar.fpsr)
+ *	      ar.ccv: cleared
+ *	      ar.csd: cleared
+ *	      ar.ssd: cleared
+ */
+#ifdef CONFIG_XEN
+GLOBAL_ENTRY(xen_leave_syscall)
+	PT_REGS_UNWIND_INFO(0)
+	movl r22=running_on_xen;;
+	ld4 r22=[r22];;
+	cmp.eq p7,p0=r22,r0
+(p7)	br.cond.sptk.many __ia64_leave_syscall;;
+#else
+ENTRY(ia64_leave_syscall)
+	PT_REGS_UNWIND_INFO(0)
+#endif
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	rsm psr.i				// disable interrupts
+	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
+(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else /* !CONFIG_PREEMPT */
+#ifdef CONFIG_XEN
+	movl r2=XSI_PSR_I
+	;;
+(pUStk)	st4 [r2]=r0
+#else
+(pUStk)	rsm psr.i
+#endif
+	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.work_processed_syscall:
+	adds r2=PT(LOADRS)+16,r12
+	adds r3=PT(AR_BSPSTORE)+16,r12
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	mov b7=r0		// clear b7
+	;;
+	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
+(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	mov r16=ar.bsp				// M2  get existing backing store pointer
+(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
+(p6)	br.cond.spnt .work_pending_syscall
+	;;
+	// start restoring the state saved on the kernel stack (struct pt_regs):
+	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
+	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
+	mov f6=f0		// clear f6
+	;;
+	invala			// M0|1 invalidate ALAT
+#ifdef CONFIG_XEN
+	movl r29=XSI_PSR_IC
+	;;
+	st8	[r29]=r0	// note: clears both vpsr.i and vpsr.ic!
+	;;
+#else
+	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
+#endif
+	mov f9=f0		// clear f9
+
+	ld8 r29=[r2],16		// load cr.ipsr
+	ld8 r28=[r3],16			// load cr.iip
+	mov f8=f0		// clear f8
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	mov.m ar.ssd=r0		// M2 clear ar.ssd
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+	mov.m ar.csd=r0		// M2 clear ar.csd
+	mov r22=r0		// clear r22
+	;;
+	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	mov f10=f0		// clear f10
+	;;
+	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
+	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
+	mov f11=f0		// clear f11
+	;;
+	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
+	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
+	ld8.fill r1=[r3],16	// load r1
+(pUStk) mov r17=1
+	;;
+	srlz.d			// M0  ensure interruption collection is off
+	ld8.fill r13=[r3],16
+	mov f7=f0		// clear f7
+	;;
+	ld8.fill r12=[r2]	// restore r12 (sp)
+	ld8.fill r15=[r3]	// restore r15
+	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
+	;;
+(pUStk)	ld4 r3=[r3]		// r3 = cpu_data->phys_stacked_size_p8
+(pUStk) st1 [r14]=r17
+	mov b6=r18		// I0  restore b6
+	;;
+	mov r14=r0		// clear r14
+	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
+(pKStk) br.cond.dpnt.many skip_rbs_switch
+
+	mov.m ar.ccv=r0		// clear ar.ccv
+(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
+	br.cond.sptk.many rbs_switch
+#ifdef CONFIG_XEN
+END(xen_leave_syscall)
+#else
+END(ia64_leave_syscall)
+#endif
+
+#ifdef CONFIG_XEN
+GLOBAL_ENTRY(xen_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+	movl r22=running_on_xen;;
+	ld4 r22=[r22];;
+	cmp.eq p7,p0=r22,r0
+(p7)	br.cond.sptk.many __ia64_leave_kernel;;
+#else
+GLOBAL_ENTRY(ia64_leave_kernel)
+	PT_REGS_UNWIND_INFO(0)
+#endif
+	/*
+	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
+	 * user- or fsys-mode, hence we disable interrupts early on.
+	 *
+	 * p6 controls whether current_thread_info()->flags needs to be check for
+	 * extra work.  We always check for extra work when returning to user-level.
+	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+	 * is 0.  After extra work processing has been completed, execution
+	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+	 * needs to be redone.
+	 */
+#ifdef CONFIG_PREEMPT
+	rsm psr.i				// disable interrupts
+	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
+(pUStk)	mov r21=0			// r21 <- 0
+	;;
+	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
+#else
+#ifdef CONFIG_XEN
+(pUStk)	movl r17=XSI_PSR_I
+	;;
+(pUStk)	st4 [r17]=r0
+	;;
+#else
+(pUStk)	rsm psr.i
+#endif
+	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
+(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
+#endif
+.work_processed_kernel:
+	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
+	adds r21=PT(PR)+16,r12
+	;;
+
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
+	adds r30=PT(AR_CCV)+16,r12
+(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+(p6)	br.cond.spnt .work_pending
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+#ifdef CONFIG_XEN
+	movl r22=XSI_PSR_IC
+	;;
+	st8 [r22]=r0		// note: clears both vpsr.i and vpsr.ic!
+	;;
+#else
+	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
+#endif
+	invala			// invalidate ALAT
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	ldf.fill f11=[r2]
+#ifdef CONFIG_XEN
+	;;
+	// r16-r31 all now hold bank1 values
+	movl r2=XSI_BANK1_R16
+	movl r3=XSI_BANK1_R16+8
+	;;
+	st8.spill [r2]=r16,16
+	st8.spill [r3]=r17,16
+	;;
+	st8.spill [r2]=r18,16
+	st8.spill [r3]=r19,16
+	;;
+	st8.spill [r2]=r20,16
+	st8.spill [r3]=r21,16
+	;;
+	st8.spill [r2]=r22,16
+	st8.spill [r3]=r23,16
+	;;
+	st8.spill [r2]=r24,16
+	st8.spill [r3]=r25,16
+	;;
+	st8.spill [r2]=r26,16
+	st8.spill [r3]=r27,16
+	;;
+	st8.spill [r2]=r28,16
+	st8.spill [r3]=r29,16
+	;;
+	st8.spill [r2]=r30,16
+	st8.spill [r3]=r31,16
+	;;
+	movl r2=XSI_BANKNUM;;
+	st4 [r2]=r0;
+#else
+	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
+#endif
+	;;
+#ifdef CONFIG_XEN
+(pUStk)	movl r18=XSI_KR0+(IA64_KR_CURRENT*8)
+	;;
+(pUStk)	ld8 r18=[r18]
+	;;
+#else
+(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
+#endif
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+	nop.i 0
+	nop.i 0
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r21=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+(pUStk)	mov r17=1
+	;;
+	ld8.fill r3=[r16]
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
+	;;
+	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
+(pKStk)	br.cond.dpnt skip_rbs_switch
+
+	/*
+	 * Restore user backing store.
+	 *
+	 * NOTE: alloc, loadrs, and cover can't be predicated.
+	 */
+(pNonSys) br.cond.dpnt dont_preserve_current_frame
+
+rbs_switch:
+#ifdef CONFIG_XEN
+	XEN_HYPER_COVER;
+#else
+	cover				// add current frame into dirty partition and set cr.ifs
+#endif
+	;;
+	mov r19=ar.bsp			// get new backing store pointer
+	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
+	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
+	;;
+	sub r19=r19,r16			// calculate total byte size of dirty partition
+	add r18=64,r18			// don't force in0-in7 into memory...
+	;;
+	shl r19=r19,16			// shift size of dirty partition into loadrs position
+	;;
+dont_preserve_current_frame:
+	/*
+	 * To prevent leaking bits between the kernel and user-space,
+	 * we must clear the stacked registers in the "invalid" partition here.
+	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
+	 * 5 registers/cycle on McKinley).
+	 */
+#	define pRecurse	p6
+#	define pReturn	p7
+#ifdef CONFIG_ITANIUM
+#	define Nregs	10
+#else
+#	define Nregs	14
+#endif
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
+	;;
+	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
+	shladd in0=loc1,3,r17
+	mov in1=0
+	;;
+	TEXT_ALIGN(32)
+rse_clear_invalid:
+#ifdef CONFIG_ITANIUM
+	// cycle 0
+ { .mii
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+}{ .mfb
+	add out1=1,in1			// increment recursion count
+	nop.f 0
+	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
+	;;
+}{ .mfi	// cycle 1
+	mov loc1=0
+	nop.f 0
+	mov loc2=0
+}{ .mib
+	mov loc3=0
+	mov loc4=0
+(pRecurse) br.call.sptk.many b0=rse_clear_invalid
+
+}{ .mfi	// cycle 2
+	mov loc5=0
+	nop.f 0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+}{ .mib
+	mov loc6=0
+	mov loc7=0
+(pReturn) br.ret.sptk.many b0
+}
+#else /* !CONFIG_ITANIUM */
+	alloc loc0=ar.pfs,2,Nregs-2,2,0
+	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
+	add out0=-Nregs*8,in0
+	add out1=1,in1			// increment recursion count
+	mov loc1=0
+	mov loc2=0
+	;;
+	mov loc3=0
+	mov loc4=0
+	mov loc5=0
+	mov loc6=0
+	mov loc7=0
+(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+	;;
+	mov loc8=0
+	mov loc9=0
+	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
+	mov loc10=0
+	mov loc11=0
+(pReturn) br.ret.sptk.many b0
+#endif /* !CONFIG_ITANIUM */
+#	undef pRecurse
+#	undef pReturn
+	;;
+	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
+	;;
+	loadrs
+	;;
+skip_rbs_switch:
+	mov ar.unat=r25		// M2
+(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
+(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
+	;;
+(pUStk)	mov ar.bspstore=r23	// M2
+(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
+(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
+	;;
+#ifdef CONFIG_XEN
+	movl r25=XSI_IPSR
+	;;
+	st8[r25]=r29,XSI_IFS-XSI_IPSR
+	;;
+#else
+	mov cr.ipsr=r29		// M2
+#endif
+	mov ar.pfs=r26		// I0
+(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
+
+#ifdef CONFIG_XEN
+(p9)	st8 [r25]=r30
+	;;
+	adds r25=XSI_IIP-XSI_IFS,r25
+	;;
+#else
+(p9)	mov cr.ifs=r30		// M2
+#endif
+	mov b0=r21		// I0
+(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
+
+	mov ar.fpsr=r20		// M2
+#ifdef CONFIG_XEN
+	st8	[r25]=r28
+#else
+	mov cr.iip=r28		// M2
+#endif
+	nop 0
+	;;
+(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
+	nop 0
+(pLvSys)mov r2=r0
+
+	mov ar.rsc=r27		// M2
+	mov pr=r31,-1		// I0
+#ifdef CONFIG_XEN
+	;;
+	XEN_HYPER_RFI;
+#else
+	rfi			// B
+#endif
+
+	/*
+	 * On entry:
+	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+	 *	r31 = current->thread_info->flags
+	 * On exit:
+	 *	p6 = TRUE if work-pending-check needs to be redone
+	 */
+.work_pending_syscall:
+	add r2=-8,r2
+	add r3=-8,r3
+	;;
+	st8 [r2]=r8
+	st8 [r3]=r10
+.work_pending:
+	tbit.nz p6,p0=r31,TIF_SIGDELAYED		// signal delayed from  MCA/INIT/NMI/PMI context?
+(p6)	br.cond.sptk.few .sigdelayed
+	;;
+	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
+(p6)	br.cond.sptk.few .notify
+#ifdef CONFIG_PREEMPT
+(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
+	;;
+(pKStk) st4 [r20]=r21
+	ssm psr.i		// enable interrupts
+#endif
+	br.call.spnt.many rp=schedule
+.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
+#ifdef CONFIG_XEN
+	movl r2=XSI_PSR_I
+	;;
+	st4 [r2]=r0
+#else
+	rsm psr.i		// disable interrupts
+#endif
+	;;
+#ifdef CONFIG_PREEMPT
+(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
+	;;
+(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
+#endif
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// re-check
+
+.notify:
+(pUStk)	br.call.spnt.many rp=notify_resume_user
+.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// don't re-check
+
+// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
+// it could not be delivered.  Deliver it now.  The signal might be for us and
+// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
+// signal.
+
+.sigdelayed:
+	br.call.sptk.many rp=do_sigdelayed
+	cmp.eq p6,p0=r0,r0				// p6 <- 1, always re-check
+(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
+	br.cond.sptk.many .work_processed_kernel	// re-check
+
+.work_pending_syscall_end:
+	adds r2=PT(R8)+16,r12
+	adds r3=PT(R10)+16,r12
+	;;
+	ld8 r8=[r2]
+	ld8 r10=[r3]
+	br.cond.sptk.many .work_processed_syscall	// re-check
+
+#ifdef CONFIG_XEN
+END(xen_leave_kernel)
+#else
+END(ia64_leave_kernel)
+#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c b/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
new file mode 100644
index 0000000000..3bc6cdbf7e
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
@@ -0,0 +1,19 @@
+
+extern unsigned long xen_get_cpuid(int);
+
+int
+running_on_sim(void)
+{
+	int i;
+	long cpuid[6];
+
+	for (i = 0; i < 5; ++i)
+		cpuid[i] = xen_get_cpuid(i);
+	if ((cpuid[0] & 0xff) != 'H') return 0;
+	if ((cpuid[3] & 0xff) != 0x4) return 0;
+	if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
+	if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
+	if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
+	return 1;
+}
+
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
new file mode 100644
index 0000000000..435d9955e4
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
@@ -0,0 +1,2015 @@
+/*
+ * arch/ia64/xen/ivt.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *		// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ *  entry offset ----/     /         /                  /          /
+ *  entry number ---------/         /                  /          /
+ *  size of the entry -------------/                  /          /
+ *  vector name -------------------------------------/          /
+ *  interruptions triggering this vector ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+#include <linux/config.h>
+
+#include <asm/asmmacro.h>
+#include <asm/break.h>
+#include <asm/ia32.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/errno.h>
+
+#ifdef CONFIG_XEN
+#define ia64_ivt xen_ivt
+#endif
+
+#if 1
+# define PSR_DEFAULT_BITS	psr.ac
+#else
+# define PSR_DEFAULT_BITS	0
+#endif
+
+#if 0
+  /*
+   * This lets you track the last eight faults that occurred on the CPU.  Make sure ar.k2 isn't
+   * needed for something else before enabling this...
+   */
+# define DBG_FAULT(i)	mov r16=ar.k2;;	shl r16=r16,8;;	add r16=(i),r16;;mov ar.k2=r16
+#else
+# define DBG_FAULT(i)
+#endif
+
+#define MINSTATE_VIRT	/* needed by minstate.h */
+#include "xenminstate.h"
+
+#define FAULT(n)									\
+	mov r31=pr;									\
+	mov r19=n;;			/* prepare to save predicates */		\
+	br.sptk.many dispatch_to_fault_handler
+
+	.section .text.ivt,"ax"
+
+	.align 32768	// align on 32KB boundary
+	.global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(vhpt_miss)
+	DBG_FAULT(0)
+	/*
+	 * The VHPT vector is invoked when the TLB entry for the virtual page table
+	 * is missing.  This happens only as a result of a previous
+	 * (the "original") TLB miss, which may either be caused by an instruction
+	 * fetch or a data access (or non-access).
+	 *
+	 * What we do here is normal TLB miss handing for the _original_ miss, followed
+	 * by inserting the TLB entry for the virtual page table page that the VHPT
+	 * walker was attempting to access.  The latter gets inserted as long
+	 * as both L1 and L2 have valid mappings for the faulting address.
+	 * The TLB entry for the original miss gets inserted only if
+	 * the L3 entry indicates that the page is present.
+	 *
+	 * do_page_fault gets invoked in the following cases:
+	 *	- the faulting virtual address uses unimplemented address bits
+	 *	- the faulting virtual address has no L1, L2, or L3 mapping
+	 */
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+#ifdef CONFIG_HUGETLB_PAGE
+	movl r18=PAGE_SHIFT
+	movl r25=XSI_ITIR
+	;;
+	ld8 r25=[r25]
+#endif
+	;;
+#else
+	mov r16=cr.ifa				// get address that caused the TLB miss
+#ifdef CONFIG_HUGETLB_PAGE
+	movl r18=PAGE_SHIFT
+	mov r25=cr.itir
+#endif
+#endif
+	;;
+#ifdef CONFIG_XEN
+	XEN_HYPER_RSM_PSR_DT;
+#else
+	rsm psr.dt				// use physical addressing for data
+#endif
+	mov r31=pr				// save the predicate registers
+#ifdef CONFIG_XEN
+	movl r19=XSI_KR0+(IA64_KR_PT_BASE*8)	// get the page table base address
+	;;
+	ld8 r19=[r19]
+#else
+	mov r19=IA64_KR(PT_BASE)		// get page table base address
+#endif
+	shl r21=r16,3				// shift bit 60 into sign bit
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	shr r22=r21,3
+#ifdef CONFIG_HUGETLB_PAGE
+	extr.u r26=r25,2,6
+	;;
+	cmp.ne p8,p0=r18,r26
+	sub r27=r26,r18
+	;;
+(p8)	dep r25=r18,r25,2,6
+(p8)	shr r22=r22,r27
+#endif
+	;;
+	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
+	shr.u r18=r22,PGDIR_SHIFT		// get bits 33-63 of the faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+
+	srlz.d
+	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
+
+	.pred.rel "mutex", p6, p7
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+	shr.u r18=r22,PMD_SHIFT			// shift L2 index into position
+	;;
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+(p7)	ld8 r20=[r17]				// fetch the L2 entry (may be 0)
+	shr.u r19=r22,PAGE_SHIFT		// shift L3 index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was L2 entry NULL?
+	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+	;;
+#ifdef CONFIG_XEN
+(p7)	ld8 r18=[r21]				// read the L3 PTE
+	movl r19=XSI_ISR
+	;;
+	ld8 r19=[r19]
+	;;
+(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
+	movl r22=XSI_IHA
+	;;
+	ld8 r22=[r22]
+	;;
+#else
+(p7)	ld8 r18=[r21]				// read the L3 PTE
+	mov r19=cr.isr				// cr.isr bit 0 tells us if this is an insn miss
+	;;
+(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
+	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
+	;;					// avoid RAW on p7
+#endif
+(p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
+	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
+	;;
+(p10)	itc.i r18				// insert the instruction TLB entry
+(p11)	itc.d r18				// insert the data TLB entry
+(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
+#ifdef CONFIG_XEN
+	;;
+	movl r24=XSI_IFA
+	;;
+	st8 [r24]=r22
+	;;
+#else
+	mov cr.ifa=r22
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+(p8)	mov cr.itir=r25				// change to default page-size for VHPT
+#endif
+
+	/*
+	 * Now compute and insert the TLB entry for the virtual page table.  We never
+	 * execute in a page table page so there is no need to set the exception deferral
+	 * bit.
+	 */
+	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
+	;;
+#ifdef CONFIG_XEN
+(p7)	mov r25=r8
+(p7)	mov r8=r24
+	;;
+(p7)	XEN_HYPER_ITC_D
+	;;
+(p7)	mov r8=r25
+	;;
+#else
+(p7)	itc.d r24
+#endif
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	/*
+	 * Re-check L2 and L3 pagetable.  If they changed, we may have received a ptc.g
+	 * between reading the pagetable and the "itc".  If so, flush the entry we
+	 * inserted and retry.
+	 */
+	ld8 r25=[r21]				// read L3 PTE again
+	ld8 r26=[r17]				// read L2 entry again
+	;;
+	cmp.ne p6,p7=r26,r20			// did L2 entry change
+	mov r27=PAGE_SHIFT<<2
+	;;
+(p6)	ptc.l r22,r27				// purge PTE page translation
+(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did L3 PTE change
+	;;
+(p6)	ptc.l r16,r27				// purge translation
+#endif
+
+	mov pr=r31,-1				// restore predicate registers
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(vhpt_miss)
+
+	.org ia64_ivt+0x400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(itlb_miss)
+	DBG_FAULT(1)
+	/*
+	 * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the L3 PTE read
+	 * and go on normally after that.
+	 */
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+#else
+	mov r16=cr.ifa				// get virtual address
+#endif
+	mov r29=b0				// save b0
+	mov r31=pr				// save predicates
+.itlb_fault:
+#ifdef CONFIG_XEN
+	movl r17=XSI_IHA
+	;;
+	ld8 r17=[r17]				// get virtual address of L3 PTE
+#else
+	mov r17=cr.iha				// get virtual address of L3 PTE
+#endif
+	movl r30=1f				// load nested fault continuation point
+	;;
+1:	ld8 r18=[r17]				// read L3 PTE
+	;;
+	mov b0=r29
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt page_fault
+	;;
+#ifdef CONFIG_XEN
+	mov r19=r8
+	mov r8=r18
+	;;
+	XEN_HYPER_ITC_I
+	;;
+	mov r8=r19
+#else
+	itc.i r18
+#endif
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r19=[r17]				// read L3 PTE again and see if same
+	mov r20=PAGE_SHIFT<<2			// setup page size for purge
+	;;
+	cmp.ne p7,p0=r18,r19
+	;;
+(p7)	ptc.l r16,r20
+#endif
+	mov pr=r31,-1
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(itlb_miss)
+
+	.org ia64_ivt+0x0800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(dtlb_miss)
+	DBG_FAULT(2)
+	/*
+	 * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the L3 PTE read
+	 * and go on normally after that.
+	 */
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+#else
+	mov r16=cr.ifa				// get virtual address
+#endif
+	mov r29=b0				// save b0
+	mov r31=pr				// save predicates
+dtlb_fault:
+#ifdef CONFIG_XEN
+	movl r17=XSI_IHA
+	;;
+	ld8 r17=[r17]				// get virtual address of L3 PTE
+#else
+	mov r17=cr.iha				// get virtual address of L3 PTE
+#endif
+	movl r30=1f				// load nested fault continuation point
+	;;
+1:	ld8 r18=[r17]				// read L3 PTE
+	;;
+	mov b0=r29
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt page_fault
+	;;
+#ifdef CONFIG_XEN
+	mov r19=r8
+	mov r8=r18
+	;;
+	XEN_HYPER_ITC_D
+	;;
+	mov r8=r19
+	;;
+#else
+	itc.d r18
+#endif
+	;;
+#ifdef CONFIG_SMP
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r19=[r17]				// read L3 PTE again and see if same
+	mov r20=PAGE_SHIFT<<2			// setup page size for purge
+	;;
+	cmp.ne p7,p0=r18,r19
+	;;
+(p7)	ptc.l r16,r20
+#endif
+	mov pr=r31,-1
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(dtlb_miss)
+
+	.org ia64_ivt+0x0c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(alt_itlb_miss)
+	DBG_FAULT(3)
+#ifdef CONFIG_XEN
+	movl r31=XSI_IPSR
+	;;
+	ld8 r21=[r31],XSI_IFA-XSI_IPSR	// get ipsr, point to ifa
+	movl r17=PAGE_KERNEL
+	;;
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	ld8 r16=[r31]		// get ifa
+	mov r31=pr
+	;;
+#else
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	movl r17=PAGE_KERNEL
+	mov r21=cr.ipsr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r31=pr
+	;;
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+	shr.u r22=r16,61			// get the region number into r21
+	;;
+	cmp.gt p8,p0=6,r22			// user mode
+	;;
+#ifndef CONFIG_XEN
+(p8)	thash r17=r16
+	;;
+(p8)	mov cr.iha=r17
+#endif
+(p8)	mov r29=b0				// save b0
+(p8)	br.cond.dptk .itlb_fault
+#endif
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
+	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
+	shr.u r18=r16,57	// move address bit 61 to bit 4
+	;;
+	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
+	or r19=r17,r19		// insert PTE control bits into r19
+	;;
+	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+(p8)	br.cond.spnt page_fault
+	;;
+#ifdef CONFIG_XEN
+	mov r18=r8
+	mov r8=r19
+	;;
+	XEN_HYPER_ITC_I
+	;;
+	mov r8=r18
+	;;
+	mov pr=r31,-1
+	;;
+	XEN_HYPER_RFI;
+#else
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+#endif
+END(alt_itlb_miss)
+
+	.org ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(alt_dtlb_miss)
+	DBG_FAULT(4)
+#ifdef CONFIG_XEN
+	movl r31=XSI_IPSR
+	;;
+	ld8 r21=[r31],XSI_ISR-XSI_IPSR	// get ipsr, point to isr
+	movl r17=PAGE_KERNEL
+	;;
+	ld8 r20=[r31],XSI_IFA-XSI_ISR	// get isr, point to ifa
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	;;
+	ld8 r16=[r31]		// get ifa
+	mov r31=pr
+	;;
+#else
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	movl r17=PAGE_KERNEL
+	mov r20=cr.isr
+	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+	mov r21=cr.ipsr
+	mov r31=pr
+	;;
+#endif
+#ifdef CONFIG_DISABLE_VHPT
+	shr.u r22=r16,61			// get the region number into r21
+	;;
+	cmp.gt p8,p0=6,r22			// access to region 0-5
+	;;
+#ifndef CONFIG_XEN
+(p8)	thash r17=r16
+	;;
+(p8)	mov cr.iha=r17
+#endif
+(p8)	mov r29=b0				// save b0
+(p8)	br.cond.dptk dtlb_fault
+#endif
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
+	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
+	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
+	shr.u r18=r16,57			// move address bit 61 to bit 4
+	and r19=r19,r16				// clear ed, reserved bits, and PTE control bits
+	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
+	;;
+	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+	cmp.ne p8,p0=r0,r23
+(p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
+(p8)	br.cond.spnt page_fault
+
+	dep r21=-1,r21,IA64_PSR_ED_BIT,1
+	or r19=r19,r17		// insert PTE control bits into r19
+	;;
+	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+(p6)	mov cr.ipsr=r21
+	;;
+#ifdef CONFIG_XEN
+(p7)	mov r18=r8
+(p7)	mov r8=r19
+	;;
+(p7)	XEN_HYPER_ITC_D
+	;;
+(p7)	mov r8=r18
+	;;
+	mov pr=r31,-1
+	;;
+	XEN_HYPER_RFI;
+#else
+(p7)	itc.d r19		// insert the TLB entry
+	mov pr=r31,-1
+	rfi
+#endif
+END(alt_dtlb_miss)
+
+	.org ia64_ivt+0x1400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(nested_dtlb_miss)
+	/*
+	 * In the absence of kernel bugs, we get here when the virtually mapped linear
+	 * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
+	 * Access-bit, or Data Access-bit faults).  If the DTLB entry for the virtual page
+	 * table is missing, a nested TLB miss fault is triggered and control is
+	 * transferred to this point.  When this happens, we lookup the pte for the
+	 * faulting address by walking the page table in physical mode and return to the
+	 * continuation point passed in register r30 (or call page_fault if the address is
+	 * not mapped).
+	 *
+	 * Input:	r16:	faulting address
+	 *		r29:	saved b0
+	 *		r30:	continuation address
+	 *		r31:	saved pr
+	 *
+	 * Output:	r17:	physical address of L3 PTE of faulting address
+	 *		r29:	saved b0
+	 *		r30:	continuation address
+	 *		r31:	saved pr
+	 *
+	 * Clobbered:	b0, r18, r19, r21, psr.dt (cleared)
+	 */
+#ifdef CONFIG_XEN
+	XEN_HYPER_RSM_PSR_DT;
+#else
+	rsm psr.dt				// switch to using physical data addressing
+#endif
+#ifdef CONFIG_XEN
+	movl r19=XSI_KR0+(IA64_KR_PT_BASE*8)	// get the page table base address
+	;;
+	ld8 r19=[r19]
+#else
+	mov r19=IA64_KR(PT_BASE)		// get the page table base address
+#endif
+	shl r21=r16,3				// shift bit 60 into sign bit
+	;;
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
+	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+
+	srlz.d
+	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
+
+	.pred.rel "mutex", p6, p7
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	;;
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
+	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+(p6)	br.cond.spnt page_fault
+	mov b0=r30
+	br.sptk.many b0				// return to continuation point
+END(nested_dtlb_miss)
+
+	.org ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(ikey_miss)
+	DBG_FAULT(6)
+	FAULT(6)
+END(ikey_miss)
+
+	//-----------------------------------------------------------------------------------
+	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+#ifdef CONFIG_XEN
+	XEN_HYPER_SSM_PSR_DT;
+#else
+	ssm psr.dt
+	;;
+	srlz.i
+#endif
+	;;
+	SAVE_MIN_WITH_COVER
+	alloc r15=ar.pfs,0,0,3,0
+#ifdef CONFIG_XEN
+	movl r3=XSI_ISR
+	;;
+	ld8 out1=[r3],XSI_IFA-XSI_ISR		// get vcr.isr, point to ifa
+	;;
+	ld8 out0=[r3]				// get vcr.ifa
+	mov r14=1
+	;;
+	add r3=XSI_PSR_IC-XSI_IFA, r3		// point to vpsr.ic
+	;;
+	st4 [r3]=r14				// vpsr.ic = 1
+	adds r3=8,r2				// set up second base pointer
+	;;
+#else
+	mov out0=cr.ifa
+	mov out1=cr.isr
+	adds r3=8,r2				// set up second base pointer
+	;;
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collectin is on
+	;;
+#endif
+#ifdef CONFIG_XEN
+	br.cond.sptk.many	xen_page_fault
+	;;
+done_xen_page_fault:
+#endif
+(p15)	ssm psr.i				// restore psr.i
+	movl r14=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12			// out2 = pointer to pt_regs
+	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
+END(page_fault)
+
+	.org ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(dkey_miss)
+	DBG_FAULT(7)
+	FAULT(7)
+#ifdef CONFIG_XEN
+	// Leaving this code inline above results in an IVT section overflow
+	// There is no particular reason for this code to be here...
+xen_page_fault:
+(p15)	movl r3=XSI_PSR_I
+	;;
+(p15)	st4 [r3]=r14,XSI_PEND-XSI_PSR_I		// if (p15) vpsr.i = 1
+	mov r14=r0
+	;;
+(p15)	ld4 r14=[r3]				// if (pending_interrupts)
+	adds r3=8,r2				// re-set up second base pointer
+	;;
+(p15)	cmp.ne	p15,p0=r14,r0
+	;;
+	br.cond.sptk.many done_xen_page_fault
+	;;
+#endif
+END(dkey_miss)
+
+	.org ia64_ivt+0x2000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(dirty_bit)
+	DBG_FAULT(8)
+	/*
+	 * What we do here is to simply turn on the dirty bit in the PTE.  We need to
+	 * update both the page-table and the TLB entry.  To efficiently access the PTE,
+	 * we address it through the virtual page table.  Most likely, the TLB entry for
+	 * the relevant virtual page table page is still present in the TLB so we can
+	 * normally do this without additional TLB misses.  In case the necessary virtual
+	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
+	 * up the physical address of the L3 PTE and then continue at label 1 below.
+	 */
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+	;;
+#else
+	mov r16=cr.ifa				// get the address that caused the fault
+#endif
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+#ifdef CONFIG_XEN
+#if 1
+	mov r18=r8;
+	mov r8=r16;
+	XEN_HYPER_THASH;;
+	mov r17=r8;
+	mov r8=r18;;
+#else
+	tak r17=r80				// "privified" thash
+#endif
+#else
+	thash r17=r16				// compute virtual address of L3 PTE
+#endif
+	mov r29=b0				// save b0 in case of nested fault
+	mov r31=pr				// save pr
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
+	;;
+	cmpxchg8.acq r26=[r17],r25,ar.ccv
+	mov r24=PAGE_SHIFT<<2
+	;;
+	cmp.eq p6,p7=r26,r18
+	;;
+(p6)	itc.d r25				// install updated PTE
+	;;
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov b0=r29				// restore b0
+	mov ar.ccv=r28
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18				// install updated PTE
+#endif
+	mov pr=r31,-1				// restore pr
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(dirty_bit)
+
+	.org ia64_ivt+0x2400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(iaccess_bit)
+	DBG_FAULT(9)
+	// Like Entry 8, except for instruction access
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+	;;
+#else
+	mov r16=cr.ifa				// get the address that caused the fault
+#endif
+	movl r30=1f				// load continuation point in case of nested fault
+	mov r31=pr				// save predicates
+#ifdef CONFIG_ITANIUM
+	/*
+	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
+	 */
+	mov r17=cr.ipsr
+	;;
+	mov r18=cr.iip
+	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
+	;;
+(p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
+#endif /* CONFIG_ITANIUM */
+	;;
+#ifdef CONFIG_XEN
+#if 1
+	mov r18=r8;
+	mov r8=r16;
+	XEN_HYPER_THASH;;
+	mov r17=r8;
+	mov r8=r18;;
+#else
+	tak r17=r80				// "privified" thash
+#endif
+#else
+	thash r17=r16				// compute virtual address of L3 PTE
+#endif
+	mov r29=b0				// save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_A,r18			// set the accessed bit
+	;;
+	cmpxchg8.acq r26=[r17],r25,ar.ccv
+	mov r24=PAGE_SHIFT<<2
+	;;
+	cmp.eq p6,p7=r26,r18
+	;;
+(p6)	itc.i r25				// install updated PTE
+	;;
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov b0=r29				// restore b0
+	mov ar.ccv=r28
+#else /* !CONFIG_SMP */
+	;;
+1:	ld8 r18=[r17]
+	;;
+	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.i r18				// install updated PTE
+#endif /* !CONFIG_SMP */
+	mov pr=r31,-1
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(iaccess_bit)
+
+	.org ia64_ivt+0x2800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(daccess_bit)
+	DBG_FAULT(10)
+	// Like Entry 8, except for data access
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+	;;
+#else
+	mov r16=cr.ifa				// get the address that caused the fault
+#endif
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+#ifdef CONFIG_XEN
+#if 1
+	mov r18=r8;
+	mov r8=r16;
+	XEN_HYPER_THASH;;
+	mov r17=r8;
+	mov r8=r18;;
+#else
+	tak r17=r80				// "privified" thash
+#endif
+#else
+	thash r17=r16				// compute virtual address of L3 PTE
+#endif
+	mov r31=pr
+	mov r29=b0				// save b0 in case of nested fault)
+#ifdef CONFIG_SMP
+	mov r28=ar.ccv				// save ar.ccv
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	mov ar.ccv=r18				// set compare value for cmpxchg
+	or r25=_PAGE_A,r18			// set the dirty bit
+	;;
+	cmpxchg8.acq r26=[r17],r25,ar.ccv
+	mov r24=PAGE_SHIFT<<2
+	;;
+	cmp.eq p6,p7=r26,r18
+	;;
+(p6)	itc.d r25				// install updated PTE
+	/*
+	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
+	 * cannot possibly affect the following loads:
+	 */
+	dv_serialize_data
+	;;
+	ld8 r18=[r17]				// read PTE again
+	;;
+	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
+	;;
+(p7)	ptc.l r16,r24
+	mov ar.ccv=r28
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_A,r18			// set the accessed bit
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18				// install updated PTE
+#endif
+	mov b0=r29				// restore b0
+	mov pr=r31,-1
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(daccess_bit)
+
+	.org ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(break_fault)
+	/*
+	 * The streamlined system call entry/exit paths only save/restore the initial part
+	 * of pt_regs.  This implies that the callers of system-calls must adhere to the
+	 * normal procedure calling conventions.
+	 *
+	 *   Registers to be saved & restored:
+	 *	CR registers: cr.ipsr, cr.iip, cr.ifs
+	 *	AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
+	 * 	others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
+	 *   Registers to be restored only:
+	 * 	r8-r11: output value from the system call.
+	 *
+	 * During system call exit, scratch registers (including r15) are modified/cleared
+	 * to prevent leaking bits from kernel to user level.
+	 */
+	DBG_FAULT(11)
+#ifdef CONFIG_XEN
+	movl r31=XSI_IPSR
+	;;
+	ld8 r29=[r31],XSI_IIP-XSI_IPSR		// get ipsr, point to iip
+	mov r18=__IA64_BREAK_SYSCALL
+	mov r21=ar.fpsr
+	;;
+	ld8 r28=[r31],XSI_IIM-XSI_IIP		// get iip, point to iim
+	mov r19=b6
+	mov r25=ar.unat
+	;;
+	ld8 r17=[r31]				// get iim
+	mov r27=ar.rsc
+	mov r26=ar.pfs
+	;;
+	adds r31=(XSI_KR0+(IA64_KR_CURRENT*8))-XSI_IIM,r31
+	;;
+	ld8 r16=[r31]				// r16 = current task
+#else
+	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
+	mov r17=cr.iim
+	mov r18=__IA64_BREAK_SYSCALL
+	mov r21=ar.fpsr
+	mov r29=cr.ipsr
+	mov r19=b6
+	mov r25=ar.unat
+	mov r27=ar.rsc
+	mov r26=ar.pfs
+	mov r28=cr.iip
+#endif
+	mov r31=pr				// prepare to save predicates
+	mov r20=r1
+	;;
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
+	cmp.eq p0,p7=r18,r17			// is this a system call? (p7 <- false, if so)
+(p7)	br.cond.spnt non_syscall
+	;;
+	ld1 r17=[r16]				// load current->thread.on_ustack flag
+	st1 [r16]=r0				// clear current->thread.on_ustack flag
+	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
+	;;
+	invala
+
+	/* adjust return address so we skip over the break instruction: */
+
+	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
+	;;
+	cmp.eq p6,p7=2,r8			// isr.ei==2?
+	mov r2=r1				// setup r2 for ia64_syscall_setup
+	;;
+(p6)	mov r8=0				// clear ei to 0
+(p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
+(p7)	adds r8=1,r8				// increment ei to next slot
+	;;
+	cmp.eq pKStk,pUStk=r0,r17		// are we in kernel mode already?
+	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
+	;;
+
+	// switch from user to kernel RBS:
+	MINSTATE_START_SAVE_MIN_VIRT
+	br.call.sptk.many b7=ia64_syscall_setup
+	;;
+#ifdef CONFIG_XEN
+	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
+#else
+	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
+#endif
+#ifdef CONFIG_XEN
+	movl r3=XSI_PSR_IC
+	mov r16=1
+	;;
+#if 1
+	st4 [r3]=r16,XSI_PSR_I-XSI_PSR_IC	// vpsr.ic = 1
+	;;
+(p15)	st4 [r3]=r16,XSI_PEND-XSI_PSR_I		// if (p15) vpsr.i = 1
+	mov r16=r0
+	;;
+(p15)	ld4 r16=[r3]				// if (pending_interrupts)
+	;;
+	cmp.ne	p6,p0=r16,r0
+	;;
+(p6)	ssm	psr.i				//   do a real ssm psr.i
+	;;
+#else
+//	st4 [r3]=r16,XSI_PSR_I-XSI_PSR_IC	// vpsr.ic = 1
+	adds r3=XSI_PSR_I-XSI_PSR_IC,r3		// SKIP vpsr.ic = 1
+	;;
+(p15)	st4 [r3]=r16,XSI_PEND-XSI_PSR_I		// if (p15) vpsr.i = 1
+	mov r16=r0
+	;;
+(p15)	ld4 r16=[r3]				// if (pending_interrupts)
+	;;
+	cmp.ne	p6,p0=r16,r0
+	;;
+//(p6)	ssm	psr.i				//   do a real ssm psr.i
+//(p6)	XEN_HYPER_SSM_I;
+(p6)	break 0x7;
+	;;
+#endif
+	mov r3=NR_syscalls - 1
+	;;
+#else
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collection is on
+	mov r3=NR_syscalls - 1
+	;;
+(p15)	ssm psr.i				// restore psr.i
+#endif
+	// p10==true means out registers are more than 8 or r15's Nat is true
+(p10)	br.cond.spnt.many ia64_ret_from_syscall
+	;;
+	movl r16=sys_call_table
+
+	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
+	movl r2=ia64_ret_from_syscall
+	;;
+	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
+	cmp.leu p6,p7=r15,r3			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
+	mov rp=r2				// set the real return addr
+	;;
+(p6)	ld8 r20=[r20]				// load address of syscall entry point
+(p7)	movl r20=sys_ni_syscall
+
+	add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+	ld4 r2=[r2]				// r2 = current_thread_info()->flags
+	;;
+	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
+	;;
+	cmp.eq p8,p0=r2,r0
+	mov b6=r20
+	;;
+(p8)	br.call.sptk.many b6=b6			// ignore this return addr
+	br.cond.sptk ia64_trace_syscall
+	// NOT REACHED
+END(break_fault)
+
+	.org ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(interrupt)
+	DBG_FAULT(12)
+	mov r31=pr		// prepare to save predicates
+	;;
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+#ifdef CONFIG_XEN
+	movl r3=XSI_PSR_IC
+	mov r14=1
+	;;
+	st4 [r3]=r14
+#else
+	ssm psr.ic | PSR_DEFAULT_BITS
+#endif
+	;;
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	srlz.i			// ensure everybody knows psr.ic is back on
+	;;
+	SAVE_REST
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef CONFIG_XEN
+	;;
+	br.call.sptk.many rp=xen_get_ivr
+	;;
+	mov out0=r8		// pass cr.ivr as first arg
+#else
+	mov out0=cr.ivr		// pass cr.ivr as first arg
+#endif
+	add out1=16,sp		// pass pointer to pt_regs as second arg
+	;;
+	srlz.d			// make sure we see the effect of cr.ivr
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_handle_irq
+END(interrupt)
+
+	.org ia64_ivt+0x3400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+	DBG_FAULT(13)
+	FAULT(13)
+
+	.org ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+	DBG_FAULT(14)
+	FAULT(14)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 *
+	 * ia64_syscall_setup() is a separate subroutine so that it can
+	 *	allocate stacked registers so it can safely demine any
+	 *	potential NaT values from the input registers.
+	 *
+	 * On entry:
+	 *	- executing on bank 0 or bank 1 register set (doesn't matter)
+	 *	-  r1: stack pointer
+	 *	-  r2: current task pointer
+	 *	-  r3: preserved
+	 *	- r11: original contents (saved ar.pfs to be saved)
+	 *	- r12: original contents (sp to be saved)
+	 *	- r13: original contents (tp to be saved)
+	 *	- r15: original contents (syscall # to be saved)
+	 *	- r18: saved bsp (after switching to kernel stack)
+	 *	- r19: saved b6
+	 *	- r20: saved r1 (gp)
+	 *	- r21: saved ar.fpsr
+	 *	- r22: kernel's register backing store base (krbs_base)
+	 *	- r23: saved ar.bspstore
+	 *	- r24: saved ar.rnat
+	 *	- r25: saved ar.unat
+	 *	- r26: saved ar.pfs
+	 *	- r27: saved ar.rsc
+	 *	- r28: saved cr.iip
+	 *	- r29: saved cr.ipsr
+	 *	- r31: saved pr
+	 *	-  b0: original contents (to be saved)
+	 * On exit:
+	 *	- executing on bank 1 registers
+	 *	- psr.ic enabled, interrupts restored
+	 *	-  p10: TRUE if syscall is invoked with more than 8 out
+	 *		registers or r15's Nat is true
+	 *	-  r1: kernel's gp
+	 *	-  r3: preserved (same as on entry)
+	 *	-  r8: -EINVAL if p10 is true
+	 *	- r12: points to kernel stack
+	 *	- r13: points to current task
+	 *	- p15: TRUE if interrupts need to be re-enabled
+	 *	- ar.fpsr: set to kernel settings
+	 */
+#ifndef CONFIG_XEN
+GLOBAL_ENTRY(ia64_syscall_setup)
+#if PT(B6) != 0
+# error This code assumes that b6 is the first field in pt_regs.
+#endif
+	st8 [r1]=r19				// save b6
+	add r16=PT(CR_IPSR),r1			// initialize first base pointer
+	add r17=PT(R11),r1			// initialize second base pointer
+	;;
+	alloc r19=ar.pfs,8,0,0,0		// ensure in0-in7 are writable
+	st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)	// save cr.ipsr
+	tnat.nz p8,p0=in0
+
+	st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)	// save r11
+	tnat.nz p9,p0=in1
+(pKStk)	mov r18=r0				// make sure r18 isn't NaT
+	;;
+
+	st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)	// save ar.pfs
+	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
+	mov r28=b0				// save b0 (2 cyc)
+	;;
+
+	st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)	// save ar.unat
+	dep r19=0,r19,38,26			// clear all bits but 0..37 [I0]
+(p8)	mov in0=-1
+	;;
+
+	st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)	// store ar.pfs.pfm in cr.ifs
+	extr.u r11=r19,7,7	// I0		// get sol of ar.pfs
+	and r8=0x7f,r19		// A		// get sof of ar.pfs
+
+	st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
+(p9)	mov in1=-1
+	;;
+
+(pUStk) sub r18=r18,r22				// r18=RSE.ndirty*8
+	tnat.nz p10,p0=in2
+	add r11=8,r11
+	;;
+(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16		// skip over ar_rnat field
+(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17	// skip over ar_bspstore field
+	tnat.nz p11,p0=in3
+	;;
+(p10)	mov in2=-1
+	tnat.nz p12,p0=in4				// [I0]
+(p11)	mov in3=-1
+	;;
+(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)	// save ar.rnat
+(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)	// save ar.bspstore
+	shl r18=r18,16				// compute ar.rsc to be used for "loadrs"
+	;;
+	st8 [r16]=r31,PT(LOADRS)-PT(PR)		// save predicates
+	st8 [r17]=r28,PT(R1)-PT(B0)		// save b0
+	tnat.nz p13,p0=in5				// [I0]
+	;;
+	st8 [r16]=r18,PT(R12)-PT(LOADRS)	// save ar.rsc value for "loadrs"
+	st8.spill [r17]=r20,PT(R13)-PT(R1)	// save original r1
+(p12)	mov in4=-1
+	;;
+
+.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
+.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
+(p13)	mov in5=-1
+	;;
+	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
+	tnat.nz p14,p0=in6
+	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
+	;;
+	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p9)	tnat.nz p10,p0=r15
+	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
+
+	st8.spill [r17]=r15			// save r15
+	tnat.nz p8,p0=in7
+	nop.i 0
+
+	mov r13=r2				// establish `current'
+	movl r1=__gp				// establish kernel global pointer
+	;;
+(p14)	mov in6=-1
+(p8)	mov in7=-1
+	nop.i 0
+
+	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
+	movl r17=FPSR_DEFAULT
+	;;
+	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
+(p10)	mov r8=-EINVAL
+	br.ret.sptk.many b7
+END(ia64_syscall_setup)
+#endif
+
+	.org ia64_ivt+0x3c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+	DBG_FAULT(15)
+	FAULT(15)
+
+	/*
+	 * Squatting in this space ...
+	 *
+	 * This special case dispatcher for illegal operation faults allows preserved
+	 * registers to be modified through a callback function (asm only) that is handed
+	 * back from the fault handler in r8. Up to three arguments can be passed to the
+	 * callback function by returning an aggregate with the callback as its first
+	 * element, followed by the arguments.
+	 */
+ENTRY(dispatch_illegal_op_fault)
+	SAVE_MIN_WITH_COVER
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i		// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i	// restore psr.i
+	adds r3=8,r2	// set up second base pointer for SAVE_REST
+	;;
+	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
+	mov out0=ar.ec
+	;;
+	SAVE_REST
+	;;
+	br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0:	;;
+	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
+	mov out0=r9
+	mov out1=r10
+	mov out2=r11
+	movl r15=ia64_leave_kernel
+	;;
+	mov rp=r15
+	mov b6=r8
+	;;
+	cmp.ne p6,p0=0,r8
+(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
+	br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
+
+	.org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	DBG_FAULT(16)
+	FAULT(16)
+
+	.org ia64_ivt+0x4400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+	DBG_FAULT(17)
+	FAULT(17)
+
+ENTRY(non_syscall)
+	SAVE_MIN_WITH_COVER
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+	alloc r14=ar.pfs,0,0,2,0
+	mov out0=cr.iim
+	add out1=16,sp
+	adds r3=8,r2			// set up second base pointer for SAVE_REST
+
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i				// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i			// restore psr.i
+	movl r15=ia64_leave_kernel
+	;;
+	SAVE_REST
+	mov rp=r15
+	;;
+	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+	.org ia64_ivt+0x4800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+	DBG_FAULT(18)
+	FAULT(18)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_unaligned_handler)
+	SAVE_MIN_WITH_COVER
+	;;
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	mov out0=cr.ifa
+	adds out1=16,sp
+
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i				// restore psr.i
+	adds r3=8,r2				// set up second base pointer
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+	.org ia64_ivt+0x4c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+	DBG_FAULT(19)
+	FAULT(19)
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+ENTRY(dispatch_to_fault_handler)
+	/*
+	 * Input:
+	 *	psr.ic:	off
+	 *	r19:	fault vector number (e.g., 24 for General Exception)
+	 *	r31:	contains saved predicates (pr)
+	 */
+	SAVE_MIN_WITH_COVER_R19
+	alloc r14=ar.pfs,0,0,5,0
+	mov out0=r15
+	mov out1=cr.isr
+	mov out2=cr.ifa
+	mov out3=cr.iim
+	mov out4=cr.itir
+	;;
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i				// restore psr.i
+	adds r3=8,r2				// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+//
+// --- End of long entries, Beginning of short entries
+//
+
+	.org ia64_ivt+0x5000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ENTRY(page_not_present)
+	DBG_FAULT(20)
+	mov r16=cr.ifa
+	rsm psr.dt
+	/*
+	 * The Linux page fault handler doesn't expect non-present pages to be in
+	 * the TLB.  Flush the existing entry now, so we meet that expectation.
+	 */
+	mov r17=PAGE_SHIFT<<2
+	;;
+	ptc.l r16,r17
+	;;
+	mov r31=pr
+	srlz.d
+	br.sptk.many page_fault
+END(page_not_present)
+
+	.org ia64_ivt+0x5100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ENTRY(key_permission)
+	DBG_FAULT(21)
+	mov r16=cr.ifa
+	rsm psr.dt
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(key_permission)
+
+	.org ia64_ivt+0x5200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(iaccess_rights)
+	DBG_FAULT(22)
+	mov r16=cr.ifa
+	rsm psr.dt
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(iaccess_rights)
+
+	.org ia64_ivt+0x5300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(daccess_rights)
+	DBG_FAULT(23)
+#ifdef CONFIG_XEN
+	movl r16=XSI_IFA
+	;;
+	ld8 r16=[r16]
+	;;
+	XEN_HYPER_RSM_PSR_DT;
+#else
+	mov r16=cr.ifa
+	rsm psr.dt
+#endif
+	mov r31=pr
+	;;
+	srlz.d
+	br.sptk.many page_fault
+END(daccess_rights)
+
+	.org ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(general_exception)
+	DBG_FAULT(24)
+	mov r16=cr.isr
+	mov r31=pr
+	;;
+	cmp4.eq p6,p0=0,r16
+(p6)	br.sptk.many dispatch_illegal_op_fault
+	;;
+	mov r19=24		// fault number
+	br.sptk.many dispatch_to_fault_handler
+END(general_exception)
+
+	.org ia64_ivt+0x5500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(disabled_fp_reg)
+	DBG_FAULT(25)
+	rsm psr.dfh		// ensure we can access fph
+	;;
+	srlz.d
+	mov r31=pr
+	mov r19=25
+	br.sptk.many dispatch_to_fault_handler
+END(disabled_fp_reg)
+
+	.org ia64_ivt+0x5600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(nat_consumption)
+	DBG_FAULT(26)
+	FAULT(26)
+END(nat_consumption)
+
+	.org ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(speculation_vector)
+	DBG_FAULT(27)
+	/*
+	 * A [f]chk.[as] instruction needs to take the branch to the recovery code but
+	 * this part of the architecture is not implemented in hardware on some CPUs, such
+	 * as Itanium.  Thus, in general we need to emulate the behavior.  IIM contains
+	 * the relative target (not yet sign extended).  So after sign extending it we
+	 * simply add it to IIP.  We also need to reset the EI field of the IPSR to zero,
+	 * i.e., the slot to restart into.
+	 *
+	 * cr.imm contains zero_ext(imm21)
+	 */
+	mov r18=cr.iim
+	;;
+	mov r17=cr.iip
+	shl r18=r18,43			// put sign bit in position (43=64-21)
+	;;
+
+	mov r16=cr.ipsr
+	shr r18=r18,39			// sign extend (39=43-4)
+	;;
+
+	add r17=r17,r18			// now add the offset
+	;;
+	mov cr.iip=r17
+	dep r16=0,r16,41,2		// clear EI
+	;;
+
+	mov cr.ipsr=r16
+	;;
+
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+END(speculation_vector)
+
+	.org ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+	DBG_FAULT(28)
+	FAULT(28)
+
+	.org ia64_ivt+0x5900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(debug_vector)
+	DBG_FAULT(29)
+	FAULT(29)
+END(debug_vector)
+
+	.org ia64_ivt+0x5a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(unaligned_access)
+	DBG_FAULT(30)
+	mov r16=cr.ipsr
+	mov r31=pr		// prepare to save predicates
+	;;
+	br.sptk.many dispatch_unaligned_handler
+END(unaligned_access)
+
+	.org ia64_ivt+0x5b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(unsupported_data_reference)
+	DBG_FAULT(31)
+	FAULT(31)
+END(unsupported_data_reference)
+
+	.org ia64_ivt+0x5c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ENTRY(floating_point_fault)
+	DBG_FAULT(32)
+	FAULT(32)
+END(floating_point_fault)
+
+	.org ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(floating_point_trap)
+	DBG_FAULT(33)
+	FAULT(33)
+END(floating_point_trap)
+
+	.org ia64_ivt+0x5e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(lower_privilege_trap)
+	DBG_FAULT(34)
+	FAULT(34)
+END(lower_privilege_trap)
+
+	.org ia64_ivt+0x5f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(taken_branch_trap)
+	DBG_FAULT(35)
+	FAULT(35)
+END(taken_branch_trap)
+
+	.org ia64_ivt+0x6000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(single_step_trap)
+	DBG_FAULT(36)
+	FAULT(36)
+END(single_step_trap)
+
+	.org ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+	DBG_FAULT(37)
+	FAULT(37)
+
+	.org ia64_ivt+0x6200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+	DBG_FAULT(38)
+	FAULT(38)
+
+	.org ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+	DBG_FAULT(39)
+	FAULT(39)
+
+	.org ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+	DBG_FAULT(40)
+	FAULT(40)
+
+	.org ia64_ivt+0x6500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+	DBG_FAULT(41)
+	FAULT(41)
+
+	.org ia64_ivt+0x6600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+	DBG_FAULT(42)
+	FAULT(42)
+
+	.org ia64_ivt+0x6700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+	DBG_FAULT(43)
+	FAULT(43)
+
+	.org ia64_ivt+0x6800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+	DBG_FAULT(44)
+	FAULT(44)
+
+	.org ia64_ivt+0x6900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(ia32_exception)
+	DBG_FAULT(45)
+	FAULT(45)
+END(ia32_exception)
+
+	.org ia64_ivt+0x6a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(ia32_intercept)
+	DBG_FAULT(46)
+#ifdef	CONFIG_IA32_SUPPORT
+	mov r31=pr
+	mov r16=cr.isr
+	;;
+	extr.u r17=r16,16,8	// get ISR.code
+	mov r18=ar.eflag
+	mov r19=cr.iim		// old eflag value
+	;;
+	cmp.ne p6,p0=2,r17
+(p6)	br.cond.spnt 1f		// not a system flag fault
+	xor r16=r18,r19
+	;;
+	extr.u r17=r16,18,1	// get the eflags.ac bit
+	;;
+	cmp.eq p6,p0=0,r17
+(p6)	br.cond.spnt 1f		// eflags.ac bit didn't change
+	;;
+	mov pr=r31,-1		// restore predicate registers
+#ifdef CONFIG_XEN
+	XEN_HYPER_RFI;
+#else
+	rfi
+#endif
+
+1:
+#endif	// CONFIG_IA32_SUPPORT
+	FAULT(46)
+END(ia32_intercept)
+
+	.org ia64_ivt+0x6b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
+ENTRY(ia32_interrupt)
+	DBG_FAULT(47)
+#ifdef CONFIG_IA32_SUPPORT
+	mov r31=pr
+	br.sptk.many dispatch_to_ia32_handler
+#else
+	FAULT(47)
+#endif
+END(ia32_interrupt)
+
+	.org ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+	DBG_FAULT(48)
+	FAULT(48)
+
+	.org ia64_ivt+0x6d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+	DBG_FAULT(49)
+	FAULT(49)
+
+	.org ia64_ivt+0x6e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+	DBG_FAULT(50)
+	FAULT(50)
+
+	.org ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+	DBG_FAULT(51)
+	FAULT(51)
+
+	.org ia64_ivt+0x7000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+	DBG_FAULT(52)
+	FAULT(52)
+
+	.org ia64_ivt+0x7100
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+	DBG_FAULT(53)
+	FAULT(53)
+
+	.org ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+	DBG_FAULT(54)
+	FAULT(54)
+
+	.org ia64_ivt+0x7300
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+	DBG_FAULT(55)
+	FAULT(55)
+
+	.org ia64_ivt+0x7400
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+	DBG_FAULT(56)
+	FAULT(56)
+
+	.org ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+	DBG_FAULT(57)
+	FAULT(57)
+
+	.org ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+	DBG_FAULT(58)
+	FAULT(58)
+
+	.org ia64_ivt+0x7700
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+	DBG_FAULT(59)
+	FAULT(59)
+
+	.org ia64_ivt+0x7800
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+	DBG_FAULT(60)
+	FAULT(60)
+
+	.org ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+	DBG_FAULT(61)
+	FAULT(61)
+
+	.org ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+	DBG_FAULT(62)
+	FAULT(62)
+
+	.org ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+	DBG_FAULT(63)
+	FAULT(63)
+
+	.org ia64_ivt+0x7c00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+	DBG_FAULT(64)
+	FAULT(64)
+
+	.org ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+	DBG_FAULT(65)
+	FAULT(65)
+
+	.org ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+	DBG_FAULT(66)
+	FAULT(66)
+
+#ifdef CONFIG_XEN
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+GLOBAL_ENTRY(xen_bsw1)
+	/* FIXME: THIS CODE IS NOT NaT SAFE! */
+	movl r30=XSI_BANKNUM;
+	mov r31=1;;
+	st4 [r30]=r31;
+	movl r30=XSI_BANK1_R16;
+	movl r31=XSI_BANK1_R16+8;;
+	ld8 r16=[r30],16; ld8 r17=[r31],16;;
+	ld8 r18=[r30],16; ld8 r19=[r31],16;;
+	ld8 r20=[r30],16; ld8 r21=[r31],16;;
+	ld8 r22=[r30],16; ld8 r23=[r31],16;;
+	ld8 r24=[r30],16; ld8 r25=[r31],16;;
+	ld8 r26=[r30],16; ld8 r27=[r31],16;;
+	ld8 r28=[r30],16; ld8 r29=[r31],16;;
+	ld8 r30=[r30]; ld8 r31=[r31];;
+	br.ret.sptk.many b0
+#endif
+
+	.org ia64_ivt+0x7f00
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+	DBG_FAULT(67)
+	FAULT(67)
+
+#ifdef CONFIG_IA32_SUPPORT
+
+	/*
+	 * There is no particular reason for this code to be here, other than that
+	 * there happens to be space here that would go unused otherwise.  If this
+	 * fault ever gets "unreserved", simply moved the following code to a more
+	 * suitable spot...
+	 */
+
+	// IA32 interrupt entry point
+
+ENTRY(dispatch_to_ia32_handler)
+	SAVE_MIN
+	;;
+	mov r14=cr.isr
+	ssm psr.ic | PSR_DEFAULT_BITS
+	;;
+	srlz.i					// guarantee that interruption collection is on
+	;;
+(p15)	ssm psr.i
+	adds r3=8,r2		// Base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	;;
+	mov r15=0x80
+	shr r14=r14,16		// Get interrupt number
+	;;
+	cmp.ne p6,p0=r14,r15
+(p6)	br.call.dpnt.many b6=non_ia32_syscall
+
+	adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp	// 16 byte hole per SW conventions
+	adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+	;;
+	cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+	ld8 r8=[r14]		// get r8
+	;;
+	st8 [r15]=r8		// save original EAX in r1 (IA32 procs don't use the GP)
+	;;
+	alloc r15=ar.pfs,0,0,6,0	// must first in an insn group
+	;;
+	ld4 r8=[r14],8		// r8 == eax (syscall number)
+	mov r15=IA32_NR_syscalls
+	;;
+	cmp.ltu.unc p6,p7=r8,r15
+	ld4 out1=[r14],8	// r9 == ecx
+	;;
+	ld4 out2=[r14],8	// r10 == edx
+	;;
+	ld4 out0=[r14]		// r11 == ebx
+	adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+	;;
+	ld4 out5=[r14],PT(R14)-PT(R13)	// r13 == ebp
+	;;
+	ld4 out3=[r14],PT(R15)-PT(R14)	// r14 == esi
+	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+	ld4 out4=[r14]		// r15 == edi
+	movl r16=ia32_syscall_table
+	;;
+(p6)	shladd r16=r8,3,r16	// force ni_syscall if not valid syscall number
+	ld4 r2=[r2]		// r2 = current_thread_info()->flags
+	;;
+	ld8 r16=[r16]
+	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
+	;;
+	mov b6=r16
+	movl r15=ia32_ret_from_syscall
+	cmp.eq p8,p0=r2,r0
+	;;
+	mov rp=r15
+(p8)	br.call.sptk.many b6=b6
+	br.cond.sptk ia32_trace_syscall
+
+non_ia32_syscall:
+	alloc r15=ar.pfs,0,0,2,0
+	mov out0=r14				// interrupt #
+	add out1=16,sp				// pointer to pt_regs
+	;;			// avoid WAW on CFM
+	br.call.sptk.many rp=ia32_bad_interrupt
+.ret1:	movl r15=ia64_leave_kernel
+	;;
+	mov rp=r15
+	br.ret.sptk.many rp
+END(dispatch_to_ia32_handler)
+
+#endif /* CONFIG_IA32_SUPPORT */
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
new file mode 100644
index 0000000000..39ace54786
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
@@ -0,0 +1,371 @@
+#include <linux/config.h>
+
+#include <asm/cache.h>
+
+#ifdef CONFIG_XEN
+#include "../kernel/entry.h"
+#else
+#include "entry.h"
+#endif
+
+/*
+ * For ivt.s we want to access the stack virtually so we don't have to disable translation
+ * on interrupts.
+ *
+ *  On entry:
+ *	r1:	pointer to current task (ar.k6)
+ */
+#define MINSTATE_START_SAVE_MIN_VIRT								\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(pUStk)	mov.m r24=ar.rnat;									\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(pKStk) mov r1=sp;					/* get sp  */				\
+	;;											\
+(pUStk) lfetch.fault.excl.nt1 [r22];								\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+	;;											\
+(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+
+#define MINSTATE_END_SAVE_MIN_VIRT								\
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
+
+/*
+ * For mca_asm.S we want to access the stack physically since the state is saved before we
+ * go virtual and don't want to destroy the iip or ipsr.
+ */
+#define MINSTATE_START_SAVE_MIN_PHYS								\
+(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
+(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
+(pKStk) ld8 r3 = [r3];;										\
+(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
+(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
+(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
+	;;											\
+(pUStk)	mov r24=ar.rnat;									\
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
+(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
+	;;											\
+(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
+(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
+	;;											\
+(pUStk)	mov r18=ar.bsp;										\
+(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+
+#define MINSTATE_END_SAVE_MIN_PHYS								\
+	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
+	;;
+
+#ifdef MINSTATE_VIRT
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
+#endif
+
+#ifdef MINSTATE_PHYS
+# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
+#endif
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	 r2 = points to &pt_regs.r16
+ *	 r8 = contents of ar.ccv
+ *	 r9 = contents of ar.csd
+ *	r10 = contents of ar.ssd
+ *	r11 = FPSR_DEFAULT
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *		preserved
+ * CONFIG_XEN note: p6/p7 are not preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#ifdef CONFIG_XEN
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
+	/*MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	movl r16=XSI_KR0+(IA64_KR_CURRENT*8);;							\
+	ld8 r16=[r16];;										\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	/* mov r29=cr.ipsr;		/* M */							\
+	movl r29=XSI_IPSR;;									\
+	ld8 r29=[r29];;										\
+	mov r26=ar.pfs;			/* I */							\
+	/* mov r28=cr.iip;		/* M */							\
+	movl r28=XSI_IIP;;									\
+	ld8 r28=[r28];;										\
+	mov r21=ar.fpsr;		/* M */							\
+	COVER;			/* B;; (or nothing) */					\
+	;;											\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	/* SAVE_IFS; /* see xen special handling below */						\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+	MINSTATE_START_SAVE_MIN									\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	/* xen special handling for possibly lazy cover */					\
+	movl r8=XSI_INCOMPL_REGFR;								\
+	;;											\
+	ld4 r30=[r8];										\
+	;;											\
+	cmp.eq	p6,p7=r30,r0;									\
+	;; /* not sure if this stop bit is necessary */						\
+(p6)	adds r8=XSI_PRECOVER_IFS-XSI_INCOMPL_REGFR,r8;						\
+(p7)	adds r8=XSI_IFS-XSI_INCOMPL_REGFR,r8;							\
+	;;											\
+	ld8 r30=[r8];										\
+	;;											\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	/* mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	movl r21=XSI_KR0+(IA64_KR_CURRENT*8);;							\
+	ld8 r13=[r21];;										\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	;;											\
+	EXTRA;											\
+	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;					\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	/* MINSTATE_END_SAVE_MIN */
+#else
+#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
+	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
+	mov r27=ar.rsc;			/* M */							\
+	mov r20=r1;			/* A */							\
+	mov r25=ar.unat;		/* M */							\
+	mov r29=cr.ipsr;		/* M */							\
+	mov r26=ar.pfs;			/* I */							\
+	mov r28=cr.iip;			/* M */							\
+	mov r21=ar.fpsr;		/* M */							\
+	COVER;				/* B;; (or nothing) */					\
+	;;											\
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
+	;;											\
+	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
+	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
+	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
+	/* switch from user to kernel RBS: */							\
+	;;											\
+	invala;				/* M */							\
+	SAVE_IFS;										\
+	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
+	;;											\
+	MINSTATE_START_SAVE_MIN									\
+	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
+	adds r16=PT(CR_IPSR),r1;								\
+	;;											\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
+	st8 [r16]=r29;		/* save cr.ipsr */						\
+	;;											\
+	lfetch.fault.excl.nt1 [r17];								\
+	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
+	mov r29=b0										\
+	;;											\
+	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
+	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
+(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r8,16;								\
+.mem.offset 8,0; st8.spill [r17]=r9,16;								\
+        ;;											\
+.mem.offset 0,0; st8.spill [r16]=r10,24;							\
+.mem.offset 8,0; st8.spill [r17]=r11,24;							\
+        ;;											\
+	st8 [r16]=r28,16;	/* save cr.iip */						\
+	st8 [r17]=r30,16;	/* save cr.ifs */						\
+(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
+	mov r8=ar.ccv;										\
+	mov r9=ar.csd;										\
+	mov r10=ar.ssd;										\
+	movl r11=FPSR_DEFAULT;   /* L-unit */							\
+	;;											\
+	st8 [r16]=r25,16;	/* save ar.unat */						\
+	st8 [r17]=r26,16;	/* save ar.pfs */						\
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
+	;;											\
+	st8 [r16]=r27,16;	/* save ar.rsc */						\
+(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
+(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
+	;;			/* avoid RAW on r16 & r17 */					\
+(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
+	st8 [r17]=r31,16;	/* save predicates */						\
+(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
+	;;											\
+	st8 [r16]=r29,16;	/* save b0 */							\
+	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
+.mem.offset 8,0; st8.spill [r17]=r12,16;							\
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r13,16;							\
+.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
+	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r15,16;							\
+.mem.offset 8,0; st8.spill [r17]=r14,16;							\
+	;;											\
+.mem.offset 0,0; st8.spill [r16]=r2,16;								\
+.mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
+	;;											\
+	EXTRA;											\
+	movl r1=__gp;		/* establish kernel global pointer */				\
+	;;											\
+	MINSTATE_END_SAVE_MIN
+#endif
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *	psr.ic: on
+ *	r2:	points to &pt_regs.r16
+ *	r3:	points to &pt_regs.r17
+ *	r8:	contents of ar.ccv
+ *	r9:	contents of ar.csd
+ *	r10:	contents of ar.ssd
+ *	r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2]=r16,16;		\
+.mem.offset 8,0; st8.spill [r3]=r17,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r18,16;		\
+.mem.offset 8,0; st8.spill [r3]=r19,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r20,16;		\
+.mem.offset 8,0; st8.spill [r3]=r21,16;		\
+	mov r18=b6;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r22,16;		\
+.mem.offset 8,0; st8.spill [r3]=r23,16;		\
+	mov r19=b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r24,16;		\
+.mem.offset 8,0; st8.spill [r3]=r25,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r26,16;		\
+.mem.offset 8,0; st8.spill [r3]=r27,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r28,16;		\
+.mem.offset 8,0; st8.spill [r3]=r29,16;		\
+	;;					\
+.mem.offset 0,0; st8.spill [r2]=r30,16;		\
+.mem.offset 8,0; st8.spill [r3]=r31,32;		\
+	;;					\
+	mov ar.fpsr=r11;	/* M-unit */	\
+	st8 [r2]=r8,8;		/* ar.ccv */	\
+	adds r24=PT(B6)-PT(F7),r3;		\
+	;;					\
+	stf.spill [r2]=f6,32;			\
+	stf.spill [r3]=f7,32;			\
+	;;					\
+	stf.spill [r2]=f8,32;			\
+	stf.spill [r3]=f9,32;			\
+	;;					\
+	stf.spill [r2]=f10;			\
+	stf.spill [r3]=f11;			\
+	adds r25=PT(B7)-PT(F11),r3;		\
+	;;					\
+	st8 [r24]=r18,16;       /* b6 */	\
+	st8 [r25]=r19,16;       /* b7 */	\
+	;;					\
+	st8 [r24]=r9;        	/* ar.csd */	\
+	st8 [r25]=r10;      	/* ar.ssd */	\
+	;;
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover, mov r30=cr.ifs,)
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
+#ifdef CONFIG_XEN
+#define SAVE_MIN		break 0;; /* FIXME: non-cover version only for ia32 support? */
+#else
+#define SAVE_MIN		DO_SAVE_MIN(     , mov r30=r0, )
+#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
new file mode 100644
index 0000000000..2fd45e2984
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
@@ -0,0 +1,73 @@
+/*
+ * ia64/xen/xenpal.S
+ *
+ * Alternate PAL  routines for Xen.  Heavily leveraged from
+ *   ia64/kernel/pal.S
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@.hp.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+GLOBAL_ENTRY(xen_pal_call_static)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
+	alloc loc1 = ar.pfs,5,5,0,0
+#ifdef CONFIG_XEN
+	movl r22=running_on_xen;;
+	ld4 r22=[r22];;
+	cmp.eq p7,p0=r22,r0
+(p7)	br.cond.spnt.many __ia64_pal_call_static;;
+#endif
+	movl loc2 = pal_entry_point
+1:	{
+	  mov r28 = in0
+	  mov r29 = in1
+	  mov r8 = ip
+	}
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	tbit.nz p6,p7 = in4, 0
+	adds r8 = 1f-1b,r8
+	mov loc4=ar.rsc			// save RSE configuration
+	;;
+	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
+	mov loc3 = psr
+	mov loc0 = rp
+	.body
+	mov r30 = in2
+
+#ifdef CONFIG_XEN
+	// this is low priority for paravirtualization, but is called
+	// from the idle loop so confuses privop counting
+	movl r31=XSI_PSR_IC
+	;;
+(p6)	st8 [r31]=r0
+	;;
+(p7)	adds r31=XSI_PSR_I-XSI_PSR_IC,r31
+	;;
+(p7)	st4 [r31]=r0
+	;;
+	mov r31 = in3
+	mov b7 = loc2
+	;;
+#else
+(p6)	rsm psr.i | psr.ic
+	mov r31 = in3
+	mov b7 = loc2
+
+(p7)	rsm psr.i
+	;;
+(p6)	srlz.i
+#endif
+	mov rp = r8
+	br.cond.sptk.many b7
+1:	mov psr.l = loc3
+	mov ar.rsc = loc4		// restore RSE configuration
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.many b0
+END(xen_pal_call_static)
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
new file mode 100644
index 0000000000..6f078961d4
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
@@ -0,0 +1,27 @@
+/*
+ * Support routines for Xen
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+	.data
+	.align 8
+	.globl running_on_xen
+running_on_xen:
+	data4 0
+
+	.text
+GLOBAL_ENTRY(early_xen_setup)
+	mov r8=cr.dcr;;
+	extr.u r8=r8,63,1
+	movl r9=running_on_xen;;
+	st4 [r9]=r8;;
+	cmp.ne p7,p0=r8,r0;;
+(p7)	movl r10=xen_ivt;;
+(p7)	mov cr.iva=r10
+	br.ret.sptk.many rp;;
+END(xen_init)
diff --git a/linux-2.6-xen-sparse/drivers/acpi/motherboard.c b/linux-2.6-xen-sparse/drivers/acpi/motherboard.c
new file mode 100644
index 0000000000..41526b5a83
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/acpi/motherboard.c
@@ -0,0 +1,178 @@
+/* 
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+/* Purpose: Prevent PCMCIA cards from using motherboard resources. */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <asm/io.h>
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+
+#define _COMPONENT		ACPI_SYSTEM_COMPONENT
+ACPI_MODULE_NAME		("acpi_motherboard")
+
+/* Dell use PNP0C01 instead of PNP0C02 */
+#define ACPI_MB_HID1			"PNP0C01"
+#define ACPI_MB_HID2			"PNP0C02"
+
+/**
+ * Doesn't care about legacy IO ports, only IO ports beyond 0x1000 are reserved
+ * Doesn't care about the failure of 'request_region', since other may reserve 
+ * the io ports as well
+ */
+#define IS_RESERVED_ADDR(base, len) \
+	(((len) > 0) && ((base) > 0) && ((base) + (len) < IO_SPACE_LIMIT) \
+	&& ((base) + (len) > PCIBIOS_MIN_IO))
+
+/*
+ * Clearing the flag (IORESOURCE_BUSY) allows drivers to use
+ * the io ports if they really know they can use it, while
+ * still preventing hotplug PCI devices from using it. 
+ */
+
+static acpi_status
+acpi_reserve_io_ranges (struct acpi_resource *res, void *data)
+{
+	struct resource *requested_res = NULL;
+
+	ACPI_FUNCTION_TRACE("acpi_reserve_io_ranges");
+
+	if (res->id == ACPI_RSTYPE_IO) {
+		struct acpi_resource_io *io_res = &res->data.io;
+
+		if (io_res->min_base_address != io_res->max_base_address)
+			return_VALUE(AE_OK);
+		if (IS_RESERVED_ADDR(io_res->min_base_address, io_res->range_length)) {
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Motherboard resources 0x%08x - 0x%08x\n",
+				io_res->min_base_address, 
+				io_res->min_base_address + io_res->range_length));
+			requested_res = request_region(io_res->min_base_address, 
+				io_res->range_length, "motherboard");
+		}
+	} else if (res->id == ACPI_RSTYPE_FIXED_IO) {
+		struct acpi_resource_fixed_io *fixed_io_res = &res->data.fixed_io;
+
+		if (IS_RESERVED_ADDR(fixed_io_res->base_address, fixed_io_res->range_length)) {
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Motherboard resources 0x%08x - 0x%08x\n",
+				fixed_io_res->base_address, 
+				fixed_io_res->base_address + fixed_io_res->range_length));
+			requested_res = request_region(fixed_io_res->base_address, 
+				fixed_io_res->range_length, "motherboard");
+		}
+	} else {
+		/* Memory mapped IO? */
+	}
+
+	if (requested_res)
+		requested_res->flags &= ~IORESOURCE_BUSY;
+	return_VALUE(AE_OK);
+}
+
+static int acpi_motherboard_add (struct acpi_device *device)
+{
+	if (!device)
+		return -EINVAL;
+	acpi_walk_resources(device->handle, METHOD_NAME__CRS, 
+		acpi_reserve_io_ranges, NULL);
+
+	return 0;
+}
+
+static struct acpi_driver acpi_motherboard_driver1 = {
+	.name =		"motherboard",
+	.class =	"",
+	.ids =		ACPI_MB_HID1,
+	.ops =	{
+		.add =		acpi_motherboard_add,
+	},
+};
+
+static struct acpi_driver acpi_motherboard_driver2 = {
+	.name =		"motherboard",
+	.class =	"",
+	.ids =		ACPI_MB_HID2,
+	.ops =	{
+		.add =		acpi_motherboard_add,
+	},
+};
+
+static void __init
+acpi_reserve_resources (void)
+{
+	if (!acpi_gbl_FADT) return;
+	if (acpi_gbl_FADT->xpm1a_evt_blk.address && acpi_gbl_FADT->pm1_evt_len)
+		request_region(acpi_gbl_FADT->xpm1a_evt_blk.address, 
+			acpi_gbl_FADT->pm1_evt_len, "PM1a_EVT_BLK");
+
+	if (acpi_gbl_FADT->xpm1b_evt_blk.address && acpi_gbl_FADT->pm1_evt_len)
+		request_region(acpi_gbl_FADT->xpm1b_evt_blk.address,
+			acpi_gbl_FADT->pm1_evt_len, "PM1b_EVT_BLK");
+
+	if (acpi_gbl_FADT->xpm1a_cnt_blk.address && acpi_gbl_FADT->pm1_cnt_len)
+		request_region(acpi_gbl_FADT->xpm1a_cnt_blk.address, 
+			acpi_gbl_FADT->pm1_cnt_len, "PM1a_CNT_BLK");
+
+	if (acpi_gbl_FADT->xpm1b_cnt_blk.address && acpi_gbl_FADT->pm1_cnt_len)
+		request_region(acpi_gbl_FADT->xpm1b_cnt_blk.address, 
+			acpi_gbl_FADT->pm1_cnt_len, "PM1b_CNT_BLK");
+
+	if (acpi_gbl_FADT->xpm_tmr_blk.address && acpi_gbl_FADT->pm_tm_len == 4)
+		request_region(acpi_gbl_FADT->xpm_tmr_blk.address,
+			4, "PM_TMR");
+
+	if (acpi_gbl_FADT->xpm2_cnt_blk.address && acpi_gbl_FADT->pm2_cnt_len)
+		request_region(acpi_gbl_FADT->xpm2_cnt_blk.address,
+			acpi_gbl_FADT->pm2_cnt_len, "PM2_CNT_BLK");
+
+	/* Length of GPE blocks must be a non-negative multiple of 2 */
+
+	if (acpi_gbl_FADT->xgpe0_blk.address && acpi_gbl_FADT->gpe0_blk_len &&
+			!(acpi_gbl_FADT->gpe0_blk_len & 0x1))
+		request_region(acpi_gbl_FADT->xgpe0_blk.address,
+			acpi_gbl_FADT->gpe0_blk_len, "GPE0_BLK");
+
+	if (acpi_gbl_FADT->xgpe1_blk.address && acpi_gbl_FADT->gpe1_blk_len &&
+			!(acpi_gbl_FADT->gpe1_blk_len & 0x1))
+		request_region(acpi_gbl_FADT->xgpe1_blk.address,
+			acpi_gbl_FADT->gpe1_blk_len, "GPE1_BLK");
+}
+
+static int __init acpi_motherboard_init(void)
+{
+	acpi_bus_register_driver(&acpi_motherboard_driver1);
+	acpi_bus_register_driver(&acpi_motherboard_driver2);
+	/* 
+	 * Guarantee motherboard IO reservation first
+	 * This module must run after scan.c
+	 */
+	if (!acpi_disabled)
+		acpi_reserve_resources ();
+	return 0;
+}
+
+/**
+ * Reserve motherboard resources after PCI claim BARs,
+ * but before PCI assign resources for uninitialized PCI devices
+ */
+fs_initcall(acpi_motherboard_init);
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/gcc_intrin.h b/linux-2.6-xen-sparse/include/asm-ia64/gcc_intrin.h
new file mode 100644
index 0000000000..b23a280297
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/gcc_intrin.h
@@ -0,0 +1,603 @@
+#ifndef _ASM_IA64_GCC_INTRIN_H
+#define _ASM_IA64_GCC_INTRIN_H
+/*
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+
+#include <linux/compiler.h>
+
+/* define this macro to get some asm stmts included in 'c' files */
+#define ASM_SUPPORTED
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define ia64_barrier()	asm volatile ("":::"memory")
+
+#define ia64_stop()	asm volatile (";;"::)
+
+#define ia64_invala_gr(regnum)	asm volatile ("invala.e r%0" :: "i"(regnum))
+
+#define ia64_invala_fr(regnum)	asm volatile ("invala.e f%0" :: "i"(regnum))
+
+extern void ia64_bad_param_for_setreg (void);
+extern void ia64_bad_param_for_getreg (void);
+
+register unsigned long ia64_r13 asm ("r13") __attribute_used__;
+
+#define __ia64_setreg(regnum, val)						\
+({										\
+	switch (regnum) {							\
+	    case _IA64_REG_PSR_L:						\
+		    asm volatile ("mov psr.l=%0" :: "r"(val) : "memory");	\
+		    break;							\
+	    case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		    asm volatile ("mov ar%0=%1" ::				\
+		    			  "i" (regnum - _IA64_REG_AR_KR0),	\
+					  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:			\
+		    asm volatile ("mov cr%0=%1" ::				\
+				          "i" (regnum - _IA64_REG_CR_DCR),	\
+					  "r"(val): "memory" );			\
+		    break;							\
+	    case _IA64_REG_SP:							\
+		    asm volatile ("mov r12=%0" ::				\
+			    		  "r"(val): "memory");			\
+		    break;							\
+	    case _IA64_REG_GP:							\
+		    asm volatile ("mov gp=%0" :: "r"(val) : "memory");		\
+		break;								\
+	    default:								\
+		    ia64_bad_param_for_setreg();				\
+		    break;							\
+	}									\
+})
+
+#define __ia64_getreg(regnum)							\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (regnum) {							\
+	case _IA64_REG_GP:							\
+		asm volatile ("mov %0=gp" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_IP:							\
+		asm volatile ("mov %0=ip" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_PSR:							\
+		asm volatile ("mov %0=psr" : "=r"(ia64_intri_res));		\
+		break;								\
+	case _IA64_REG_TP:	/* for current() */				\
+		ia64_intri_res = ia64_r13;					\
+		break;								\
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC:				\
+		asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res)		\
+				      : "i"(regnum - _IA64_REG_AR_KR0));	\
+		break;								\
+	case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1:				\
+		asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res)		\
+				      : "i" (regnum - _IA64_REG_CR_DCR));	\
+		break;								\
+	case _IA64_REG_SP:							\
+		asm volatile ("mov %0=sp" : "=r" (ia64_intri_res));		\
+		break;								\
+	default:								\
+		ia64_bad_param_for_getreg();					\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#define ia64_hint_pause 0
+
+#define __ia64_hint(mode)						\
+({								\
+	switch (mode) {						\
+	case ia64_hint_pause:					\
+		asm volatile ("hint @pause" ::: "memory");	\
+		break;						\
+	}							\
+})
+
+
+/* Integer values for mux1 instruction */
+#define ia64_mux1_brcst 0
+#define ia64_mux1_mix   8
+#define ia64_mux1_shuf  9
+#define ia64_mux1_alt  10
+#define ia64_mux1_rev  11
+
+#define ia64_mux1(x, mode)							\
+({										\
+	__u64 ia64_intri_res;							\
+										\
+	switch (mode) {								\
+	case ia64_mux1_brcst:							\
+		asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_mix:							\
+		asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_shuf:							\
+		asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_alt:							\
+		asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	case ia64_mux1_rev:							\
+		asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x));	\
+		break;								\
+	}									\
+	ia64_intri_res;								\
+})
+
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define ia64_popcnt(x)		__builtin_popcountl(x)
+#else
+# define ia64_popcnt(x)						\
+  ({								\
+	__u64 ia64_intri_res;					\
+	asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x));	\
+								\
+	ia64_intri_res;						\
+  })
+#endif
+
+#define ia64_getf_exp(x)					\
+({								\
+	long ia64_intri_res;					\
+								\
+	asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x));	\
+								\
+	ia64_intri_res;						\
+})
+
+#define ia64_shrp(a, b, count)								\
+({											\
+	__u64 ia64_intri_res;								\
+	asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count));	\
+	ia64_intri_res;									\
+})
+
+#define ia64_ldfs(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfd(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldfe(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf8(regnum, x)					\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_ldf_fill(regnum, x)				\
+({								\
+	register double __f__ asm ("f"#regnum);			\
+	asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x));	\
+})
+
+#define ia64_stfs(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfd(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stfe(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf8(x, regnum)						\
+({									\
+	register double __f__ asm ("f"#regnum);				\
+	asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_stf_spill(x, regnum)						\
+({										\
+	register double __f__ asm ("f"#regnum);					\
+	asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory");	\
+})
+
+#define ia64_fetchadd4_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd4_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd4.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_acq(p, inc)						\
+({										\
+										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.acq %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_fetchadd8_rel(p, inc)						\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("fetchadd8.rel %0=[%1],%2"				\
+				: "=r"(ia64_intri_res) : "r"(p), "i" (inc)	\
+				: "memory");					\
+										\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg1(ptr,x)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("xchg1 %0=[%1],%2"					\
+		      : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory");	\
+	ia64_intri_res;								\
+})
+
+#define ia64_xchg2(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg4(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_xchg8(ptr,x)						\
+({									\
+	__u64 ia64_intri_res;						\
+	asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res)	\
+		      : "r" (ptr), "r" (x) : "memory");			\
+	ia64_intri_res;							\
+})
+
+#define ia64_cmpxchg1_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg1_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg2_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg4_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_acq(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+	asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_cmpxchg8_rel(ptr, new, old)						\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov ar.ccv=%0;;" :: "rO"(old));					\
+											\
+	asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv":					\
+			      "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory");	\
+	ia64_intri_res;									\
+})
+
+#define ia64_mf()	asm volatile ("mf" ::: "memory")
+#define ia64_mfa()	asm volatile ("mf.a" ::: "memory")
+
+#define ia64_invala() asm volatile ("invala" ::: "memory")
+
+#define __ia64_thash(addr)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr));	\
+	ia64_intri_res;								\
+})
+
+#define ia64_srlz_i()	asm volatile (";; srlz.i ;;" ::: "memory")
+#define ia64_srlz_d()	asm volatile (";; srlz.d" ::: "memory");
+
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+# define ia64_dv_serialize_data()		asm volatile (".serialize.data");
+# define ia64_dv_serialize_instruction()	asm volatile (".serialize.instruction");
+#else
+# define ia64_dv_serialize_data()
+# define ia64_dv_serialize_instruction()
+#endif
+
+#define ia64_nop(x)	asm volatile ("nop %0"::"i"(x));
+
+#define __ia64_itci(addr)	asm volatile ("itc.i %0;;" :: "r"(addr) : "memory")
+
+#define __ia64_itcd(addr)	asm volatile ("itc.d %0;;" :: "r"(addr) : "memory")
+
+
+#define __ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1"			\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define __ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1"			\
+					     :: "r"(trnum), "r"(addr) : "memory")
+
+#define __ia64_tpa(addr)							\
+({										\
+	__u64 ia64_pa;								\
+	asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory");	\
+	ia64_pa;								\
+})
+
+#define __ia64_set_dbr(index, val)						\
+	asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define __ia64_set_ibr(index, val)						\
+	asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define __ia64_set_pkr(index, val)						\
+	asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define __ia64_set_pmc(index, val)						\
+	asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define __ia64_set_pmd(index, val)						\
+	asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory")
+
+#define __ia64_set_rr(index, val)							\
+	asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory");
+
+#define __ia64_get_cpuid(index)								\
+({											\
+	__u64 ia64_intri_res;								\
+	asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index));	\
+	ia64_intri_res;									\
+})
+
+#define __ia64_get_dbr(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define __ia64_get_ibr(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define __ia64_get_pkr(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define __ia64_get_pmc(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+
+#define __ia64_get_pmd(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index));	\
+	ia64_intri_res;								\
+})
+
+#define __ia64_get_rr(index)							\
+({										\
+	__u64 ia64_intri_res;							\
+	asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index));	\
+	ia64_intri_res;								\
+})
+
+#define __ia64_fc(addr)	asm volatile ("fc %0" :: "r"(addr) : "memory")
+
+
+#define ia64_sync_i()	asm volatile (";; sync.i" ::: "memory")
+
+#define __ia64_ssm(mask)	asm volatile ("ssm %0":: "i"((mask)) : "memory")
+#define __ia64_rsm(mask)	asm volatile ("rsm %0":: "i"((mask)) : "memory")
+#define ia64_sum(mask)	asm volatile ("sum %0":: "i"((mask)) : "memory")
+#define ia64_rum(mask)	asm volatile ("rum %0":: "i"((mask)) : "memory")
+
+#define __ia64_ptce(addr)	asm volatile ("ptc.e %0" :: "r"(addr))
+
+#define __ia64_ptcga(addr, size)							\
+do {										\
+	asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define __ia64_ptcl(addr, size)							\
+do {										\
+	asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory");	\
+	ia64_dv_serialize_data();						\
+} while (0)
+
+#define __ia64_ptri(addr, size)						\
+	asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+#define __ia64_ptrd(addr, size)						\
+	asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory")
+
+/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */
+
+#define ia64_lfhint_none   0
+#define ia64_lfhint_nt1    1
+#define ia64_lfhint_nt2    2
+#define ia64_lfhint_nta    3
+
+#define ia64_lfetch(lfhint, y)					\
+({								\
+        switch (lfhint) {					\
+        case ia64_lfhint_none:					\
+                asm volatile ("lfetch [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt1:					\
+                asm volatile ("lfetch.nt1 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nt2:					\
+                asm volatile ("lfetch.nt2 [%0]" : : "r"(y));	\
+                break;						\
+        case ia64_lfhint_nta:					\
+                asm volatile ("lfetch.nta [%0]" : : "r"(y));	\
+                break;						\
+        }							\
+})
+
+#define ia64_lfetch_excl(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.excl [%0]" :: "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault(lfhint, y)					\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault [%0]" : : "r"(y));		\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.nta [%0]" : : "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define ia64_lfetch_fault_excl(lfhint, y)				\
+({									\
+        switch (lfhint) {						\
+        case ia64_lfhint_none:						\
+                asm volatile ("lfetch.fault.excl [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt1:						\
+                asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nt2:						\
+                asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y));	\
+                break;							\
+        case ia64_lfhint_nta:						\
+                asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y));	\
+                break;							\
+        }								\
+})
+
+#define __ia64_intrin_local_irq_restore(x)			\
+do {								\
+	asm volatile (";;   cmp.ne p6,p7=%0,r0;;"		\
+		      "(p6) ssm psr.i;"				\
+		      "(p7) rsm psr.i;;"			\
+		      "(p6) srlz.d"				\
+		      :: "r"((x)) : "p6", "p7", "memory");	\
+} while (0)
+
+#define __ia64_get_psr_i()	(__ia64_getreg(_IA64_REG_PSR) & 0x4000UL)
+
+#endif /* _ASM_IA64_GCC_INTRIN_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h b/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h
new file mode 100644
index 0000000000..d4e5471f46
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/intel_intrin.h
@@ -0,0 +1,259 @@
+#ifndef _ASM_IA64_INTEL_INTRIN_H
+#define _ASM_IA64_INTEL_INTRIN_H
+/*
+ * Intel Compiler Intrinsics
+ *
+ * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
+ * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ *
+ */
+#include <asm/types.h>
+
+void  __lfetch(int lfhint, void *y);
+void  __lfetch_excl(int lfhint, void *y);
+void  __lfetch_fault(int lfhint, void *y);
+void  __lfetch_fault_excl(int lfhint, void *y);
+
+/* In the following, whichFloatReg should be an integer from 0-127 */
+void  __ldfs(const int whichFloatReg, void *src);
+void  __ldfd(const int whichFloatReg, void *src);
+void  __ldfe(const int whichFloatReg, void *src);
+void  __ldf8(const int whichFloatReg, void *src);
+void  __ldf_fill(const int whichFloatReg, void *src);
+void  __stfs(void *dst, const int whichFloatReg);
+void  __stfd(void *dst, const int whichFloatReg);
+void  __stfe(void *dst, const int whichFloatReg);
+void  __stf8(void *dst, const int whichFloatReg);
+void  __stf_spill(void *dst, const int whichFloatReg);
+
+void  __st1_rel(void *dst, const __s8  value);
+void  __st2_rel(void *dst, const __s16 value);
+void  __st4_rel(void *dst, const __s32 value);
+void  __st8_rel(void *dst, const __s64 value);
+__u8  __ld1_acq(void *src);
+__u16 __ld2_acq(void *src);
+__u32 __ld4_acq(void *src);
+__u64 __ld8_acq(void *src);
+
+__u64 __fetchadd4_acq(__u32 *addend, const int increment);
+__u64 __fetchadd4_rel(__u32 *addend, const int increment);
+__u64 __fetchadd8_acq(__u64 *addend, const int increment);
+__u64 __fetchadd8_rel(__u64 *addend, const int increment);
+
+__u64 __getf_exp(double d);
+
+/* OS Related Itanium(R) Intrinsics  */
+
+/* The names to use for whichReg and whichIndReg below come from
+   the include file asm/ia64regs.h */
+
+__u64 __getIndReg(const int whichIndReg, __s64 index);
+__u64 __getReg(const int whichReg);
+
+void  __setIndReg(const int whichIndReg, __s64 index, __u64 value);
+void  __setReg(const int whichReg, __u64 value);
+
+void  __mf(void);
+void  __mfa(void);
+void  __synci(void);
+void  __itcd(__s64 pa);
+void  __itci(__s64 pa);
+void  __itrd(__s64 whichTransReg, __s64 pa);
+void  __itri(__s64 whichTransReg, __s64 pa);
+void  __ptce(__s64 va);
+void  __ptcl(__s64 va, __s64 pagesz);
+void  __ptcg(__s64 va, __s64 pagesz);
+void  __ptcga(__s64 va, __s64 pagesz);
+void  __ptri(__s64 va, __s64 pagesz);
+void  __ptrd(__s64 va, __s64 pagesz);
+void  __invala (void);
+void  __invala_gr(const int whichGeneralReg /* 0-127 */ );
+void  __invala_fr(const int whichFloatReg /* 0-127 */ );
+void  __nop(const int);
+void  __fc(__u64 *addr);
+void  __sum(int mask);
+void  __rum(int mask);
+void  __ssm(int mask);
+void  __rsm(int mask);
+__u64 __thash(__s64);
+__u64 __ttag(__s64);
+__s64 __tpa(__s64);
+
+/* Intrinsics for implementing get/put_user macros */
+void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val);
+void __ld_user(const char *tableName, __u64 addr, char size, char relocType);
+
+/* This intrinsic does not generate code, it creates a barrier across which
+ * the compiler will not schedule data access instructions.
+ */
+void __memory_barrier(void);
+
+void __isrlz(void);
+void __dsrlz(void);
+
+__u64  _m64_mux1(__u64 a, const int n);
+__u64  __thash(__u64);
+
+/* Lock and Atomic Operation Related Intrinsics */
+__u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value);
+__u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value);
+__s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value);
+__s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value);
+
+__u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp);
+__u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp);
+
+__s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len);
+__s64 _m64_shrp(__s64 a, __s64 b, const int count);
+__s64 _m64_popcnt(__s64 a);
+
+#define ia64_barrier()		__memory_barrier()
+
+#define ia64_stop()	/* Nothing: As of now stop bit is generated for each
+		 	 * intrinsic
+		 	 */
+
+#define __ia64_getreg		__getReg
+#define __ia64_setreg		__setReg
+
+#define __ia64_hint(x)
+
+#define ia64_mux1_brcst	 0
+#define ia64_mux1_mix		 8
+#define ia64_mux1_shuf		 9
+#define ia64_mux1_alt		10
+#define ia64_mux1_rev		11
+
+#define ia64_mux1		_m64_mux1
+#define ia64_popcnt		_m64_popcnt
+#define ia64_getf_exp		__getf_exp
+#define ia64_shrp		_m64_shrp
+
+#define __ia64_tpa		__tpa
+#define ia64_invala		__invala
+#define ia64_invala_gr		__invala_gr
+#define ia64_invala_fr		__invala_fr
+#define ia64_nop		__nop
+#define ia64_sum		__sum
+#define __ia64_ssm		__ssm
+#define ia64_rum		__rum
+#define __ia64_rsm		__rsm
+#define __ia64_fc 		__fc
+
+#define ia64_ldfs		__ldfs
+#define ia64_ldfd		__ldfd
+#define ia64_ldfe		__ldfe
+#define ia64_ldf8		__ldf8
+#define ia64_ldf_fill		__ldf_fill
+
+#define ia64_stfs		__stfs
+#define ia64_stfd		__stfd
+#define ia64_stfe		__stfe
+#define ia64_stf8		__stf8
+#define ia64_stf_spill		__stf_spill
+
+#define ia64_mf		__mf
+#define ia64_mfa		__mfa
+
+#define ia64_fetchadd4_acq	__fetchadd4_acq
+#define ia64_fetchadd4_rel	__fetchadd4_rel
+#define ia64_fetchadd8_acq	__fetchadd8_acq
+#define ia64_fetchadd8_rel	__fetchadd8_rel
+
+#define ia64_xchg1		_InterlockedExchange8
+#define ia64_xchg2		_InterlockedExchange16
+#define ia64_xchg4		_InterlockedExchange
+#define ia64_xchg8		_InterlockedExchange64
+
+#define ia64_cmpxchg1_rel	_InterlockedCompareExchange8_rel
+#define ia64_cmpxchg1_acq	_InterlockedCompareExchange8_acq
+#define ia64_cmpxchg2_rel	_InterlockedCompareExchange16_rel
+#define ia64_cmpxchg2_acq	_InterlockedCompareExchange16_acq
+#define ia64_cmpxchg4_rel	_InterlockedCompareExchange_rel
+#define ia64_cmpxchg4_acq	_InterlockedCompareExchange_acq
+#define ia64_cmpxchg8_rel	_InterlockedCompareExchange64_rel
+#define ia64_cmpxchg8_acq	_InterlockedCompareExchange64_acq
+
+#define __ia64_set_dbr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_DBR, index, val)
+#define __ia64_set_ibr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_IBR, index, val)
+#define __ia64_set_pkr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PKR, index, val)
+#define __ia64_set_pmc(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMC, index, val)
+#define __ia64_set_pmd(index, val)	\
+		__setIndReg(_IA64_REG_INDR_PMD, index, val)
+#define __ia64_set_rr(index, val)	\
+		__setIndReg(_IA64_REG_INDR_RR, index, val)
+
+#define __ia64_get_cpuid(index) 	__getIndReg(_IA64_REG_INDR_CPUID, index)
+#define __ia64_get_dbr(index) 	__getIndReg(_IA64_REG_INDR_DBR, index)
+#define __ia64_get_ibr(index) 	__getIndReg(_IA64_REG_INDR_IBR, index)
+#define __ia64_get_pkr(index) 	__getIndReg(_IA64_REG_INDR_PKR, index)
+#define __ia64_get_pmc(index) 	__getIndReg(_IA64_REG_INDR_PMC, index)
+#define __ia64_get_pmd(index)  	__getIndReg(_IA64_REG_INDR_PMD, index)
+#define __ia64_get_rr(index) 	__getIndReg(_IA64_REG_INDR_RR, index)
+
+#define ia64_srlz_d		__dsrlz
+#define ia64_srlz_i		__isrlz
+
+#define ia64_dv_serialize_data()
+#define ia64_dv_serialize_instruction()
+
+#define ia64_st1_rel		__st1_rel
+#define ia64_st2_rel		__st2_rel
+#define ia64_st4_rel		__st4_rel
+#define ia64_st8_rel		__st8_rel
+
+#define ia64_ld1_acq		__ld1_acq
+#define ia64_ld2_acq		__ld2_acq
+#define ia64_ld4_acq		__ld4_acq
+#define ia64_ld8_acq		__ld8_acq
+
+#define ia64_sync_i		__synci
+#define __ia64_thash		__thash
+#define __ia64_ttag		__ttag
+#define __ia64_itcd		__itcd
+#define __ia64_itci		__itci
+#define __ia64_itrd		__itrd
+#define __ia64_itri		__itri
+#define __ia64_ptce		__ptce
+#define __ia64_ptcl		__ptcl
+#define __ia64_ptcg		__ptcg
+#define __ia64_ptcga		__ptcga
+#define __ia64_ptri		__ptri
+#define __ia64_ptrd		__ptrd
+#define ia64_dep_mi		_m64_dep_mi
+
+/* Values for lfhint in __lfetch and __lfetch_fault */
+
+#define ia64_lfhint_none   	0
+#define ia64_lfhint_nt1    	1
+#define ia64_lfhint_nt2    	2
+#define ia64_lfhint_nta    	3
+
+#define ia64_lfetch		__lfetch
+#define ia64_lfetch_excl	__lfetch_excl
+#define ia64_lfetch_fault	__lfetch_fault
+#define ia64_lfetch_fault_excl	__lfetch_fault_excl
+
+#define __ia64_intrin_local_irq_restore(x)		\
+do {							\
+	if ((x) != 0) {					\
+		__ia64_ssm(IA64_PSR_I);			\
+		ia64_srlz_d();				\
+	} else {					\
+		__ia64_rsm(IA64_PSR_I);			\
+	}						\
+} while (0)
+
+#define __ia64_get_psr_i()	(__ia64_getreg(_IA64_REG_PSR) & 0x4000UL)
+
+#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/pal.h b/linux-2.6-xen-sparse/include/asm-ia64/pal.h
new file mode 100644
index 0000000000..d7c6896189
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/pal.h
@@ -0,0 +1,1633 @@
+#ifndef _ASM_IA64_PAL_H
+#define _ASM_IA64_PAL_H
+
+/*
+ * Processor Abstraction Layer definitions.
+ *
+ * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0
+ * chapter 11 IA-64 Processor Abstraction Layer
+ *
+ * Copyright (C) 1998-2001 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com>
+ *
+ * 99/10/01	davidm	Make sure we pass zero for reserved parameters.
+ * 00/03/07	davidm	Updated pal_cache_flush() to be in sync with PAL v2.6.
+ * 00/03/23     cfleck  Modified processor min-state save area to match updated PAL & SAL info
+ * 00/05/24     eranian Updated to latest PAL spec, fix structures bugs, added
+ * 00/05/25	eranian Support for stack calls, and static physical calls
+ * 00/06/18	eranian Support for stacked physical calls
+ */
+
+/*
+ * Note that some of these calls use a static-register only calling
+ * convention which has nothing to do with the regular calling
+ * convention.
+ */
+#define PAL_CACHE_FLUSH		1	/* flush i/d cache */
+#define PAL_CACHE_INFO		2	/* get detailed i/d cache info */
+#define PAL_CACHE_INIT		3	/* initialize i/d cache */
+#define PAL_CACHE_SUMMARY	4	/* get summary of cache heirarchy */
+#define PAL_MEM_ATTRIB		5	/* list supported memory attributes */
+#define PAL_PTCE_INFO		6	/* purge TLB info */
+#define PAL_VM_INFO		7	/* return supported virtual memory features */
+#define PAL_VM_SUMMARY		8	/* return summary on supported vm features */
+#define PAL_BUS_GET_FEATURES	9	/* return processor bus interface features settings */
+#define PAL_BUS_SET_FEATURES	10	/* set processor bus features */
+#define PAL_DEBUG_INFO		11	/* get number of debug registers */
+#define PAL_FIXED_ADDR		12	/* get fixed component of processors's directed address */
+#define PAL_FREQ_BASE		13	/* base frequency of the platform */
+#define PAL_FREQ_RATIOS		14	/* ratio of processor, bus and ITC frequency */
+#define PAL_PERF_MON_INFO	15	/* return performance monitor info */
+#define PAL_PLATFORM_ADDR	16	/* set processor interrupt block and IO port space addr */
+#define PAL_PROC_GET_FEATURES	17	/* get configurable processor features & settings */
+#define PAL_PROC_SET_FEATURES	18	/* enable/disable configurable processor features */
+#define PAL_RSE_INFO		19	/* return rse information */
+#define PAL_VERSION		20	/* return version of PAL code */
+#define PAL_MC_CLEAR_LOG	21	/* clear all processor log info */
+#define PAL_MC_DRAIN		22	/* drain operations which could result in an MCA */
+#define PAL_MC_EXPECTED		23	/* set/reset expected MCA indicator */
+#define PAL_MC_DYNAMIC_STATE	24	/* get processor dynamic state */
+#define PAL_MC_ERROR_INFO	25	/* get processor MCA info and static state */
+#define PAL_MC_RESUME		26	/* Return to interrupted process */
+#define PAL_MC_REGISTER_MEM	27	/* Register memory for PAL to use during MCAs and inits */
+#define PAL_HALT		28	/* enter the low power HALT state */
+#define PAL_HALT_LIGHT		29	/* enter the low power light halt state*/
+#define PAL_COPY_INFO		30	/* returns info needed to relocate PAL */
+#define PAL_CACHE_LINE_INIT	31	/* init tags & data of cache line */
+#define PAL_PMI_ENTRYPOINT	32	/* register PMI memory entry points with the processor */
+#define PAL_ENTER_IA_32_ENV	33	/* enter IA-32 system environment */
+#define PAL_VM_PAGE_SIZE	34	/* return vm TC and page walker page sizes */
+
+#define PAL_MEM_FOR_TEST	37	/* get amount of memory needed for late processor test */
+#define PAL_CACHE_PROT_INFO	38	/* get i/d cache protection info */
+#define PAL_REGISTER_INFO	39	/* return AR and CR register information*/
+#define PAL_SHUTDOWN		40	/* enter processor shutdown state */
+#define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
+#define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
+
+#define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
+#define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
+#define PAL_TEST_PROC		258	/* perform late processor self-test */
+#define PAL_CACHE_READ		259	/* read tag & data of cacheline for diagnostic testing */
+#define PAL_CACHE_WRITE		260	/* write tag & data of cacheline for diagnostic testing */
+#define PAL_VM_TR_READ		261	/* read contents of translation register */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <asm/fpu.h>
+
+/*
+ * Data types needed to pass information into PAL procedures and
+ * interpret information returned by them.
+ */
+
+/* Return status from the PAL procedure */
+typedef s64				pal_status_t;
+
+#define PAL_STATUS_SUCCESS		0	/* No error */
+#define PAL_STATUS_UNIMPLEMENTED	(-1)	/* Unimplemented procedure */
+#define PAL_STATUS_EINVAL		(-2)	/* Invalid argument */
+#define PAL_STATUS_ERROR		(-3)	/* Error */
+#define PAL_STATUS_CACHE_INIT_FAIL	(-4)	/* Could not initialize the
+						 * specified level and type of
+						 * cache without sideeffects
+						 * and "restrict" was 1
+						 */
+
+/* Processor cache level in the heirarchy */
+typedef u64				pal_cache_level_t;
+#define PAL_CACHE_LEVEL_L0		0	/* L0 */
+#define PAL_CACHE_LEVEL_L1		1	/* L1 */
+#define PAL_CACHE_LEVEL_L2		2	/* L2 */
+
+
+/* Processor cache type at a particular level in the heirarchy */
+
+typedef u64				pal_cache_type_t;
+#define PAL_CACHE_TYPE_INSTRUCTION	1	/* Instruction cache */
+#define PAL_CACHE_TYPE_DATA		2	/* Data or unified cache */
+#define PAL_CACHE_TYPE_INSTRUCTION_DATA	3	/* Both Data & Instruction */
+
+
+#define PAL_CACHE_FLUSH_INVALIDATE	1	/* Invalidate clean lines */
+#define PAL_CACHE_FLUSH_CHK_INTRS	2	/* check for interrupts/mc while flushing */
+
+/* Processor cache line size in bytes  */
+typedef int				pal_cache_line_size_t;
+
+/* Processor cache line state */
+typedef u64				pal_cache_line_state_t;
+#define PAL_CACHE_LINE_STATE_INVALID	0	/* Invalid */
+#define PAL_CACHE_LINE_STATE_SHARED	1	/* Shared */
+#define PAL_CACHE_LINE_STATE_EXCLUSIVE	2	/* Exclusive */
+#define PAL_CACHE_LINE_STATE_MODIFIED	3	/* Modified */
+
+typedef struct pal_freq_ratio {
+	u64 den : 32, num : 32;	/* numerator & denominator */
+} itc_ratio, proc_ratio;
+
+typedef	union  pal_cache_config_info_1_s {
+	struct {
+		u64		u		: 1,	/* 0 Unified cache ? */
+				at		: 2,	/* 2-1 Cache mem attr*/
+				reserved	: 5,	/* 7-3 Reserved */
+				associativity	: 8,	/* 16-8 Associativity*/
+				line_size	: 8,	/* 23-17 Line size */
+				stride		: 8,	/* 31-24 Stride */
+				store_latency	: 8,	/*39-32 Store latency*/
+				load_latency	: 8,	/* 47-40 Load latency*/
+				store_hints	: 8,	/* 55-48 Store hints*/
+				load_hints	: 8;	/* 63-56 Load hints */
+	} pcci1_bits;
+	u64			pcci1_data;
+} pal_cache_config_info_1_t;
+
+typedef	union  pal_cache_config_info_2_s {
+	struct {
+		u64		cache_size	: 32,	/*cache size in bytes*/
+
+
+				alias_boundary	: 8,	/* 39-32 aliased addr
+							 * separation for max
+							 * performance.
+							 */
+				tag_ls_bit	: 8,	/* 47-40 LSb of addr*/
+				tag_ms_bit	: 8,	/* 55-48 MSb of addr*/
+				reserved	: 8;	/* 63-56 Reserved */
+	} pcci2_bits;
+	u64			pcci2_data;
+} pal_cache_config_info_2_t;
+
+
+typedef struct pal_cache_config_info_s {
+	pal_status_t			pcci_status;
+	pal_cache_config_info_1_t	pcci_info_1;
+	pal_cache_config_info_2_t	pcci_info_2;
+	u64				pcci_reserved;
+} pal_cache_config_info_t;
+
+#define pcci_ld_hints		pcci_info_1.pcci1_bits.load_hints
+#define pcci_st_hints		pcci_info_1.pcci1_bits.store_hints
+#define pcci_ld_latency		pcci_info_1.pcci1_bits.load_latency
+#define pcci_st_latency		pcci_info_1.pcci1_bits.store_latency
+#define pcci_stride		pcci_info_1.pcci1_bits.stride
+#define pcci_line_size		pcci_info_1.pcci1_bits.line_size
+#define pcci_assoc		pcci_info_1.pcci1_bits.associativity
+#define pcci_cache_attr		pcci_info_1.pcci1_bits.at
+#define pcci_unified		pcci_info_1.pcci1_bits.u
+#define pcci_tag_msb		pcci_info_2.pcci2_bits.tag_ms_bit
+#define pcci_tag_lsb		pcci_info_2.pcci2_bits.tag_ls_bit
+#define pcci_alias_boundary	pcci_info_2.pcci2_bits.alias_boundary
+#define pcci_cache_size		pcci_info_2.pcci2_bits.cache_size
+
+
+
+/* Possible values for cache attributes */
+
+#define PAL_CACHE_ATTR_WT		0	/* Write through cache */
+#define PAL_CACHE_ATTR_WB		1	/* Write back cache */
+#define PAL_CACHE_ATTR_WT_OR_WB		2	/* Either write thru or write
+						 * back depending on TLB
+						 * memory attributes
+						 */
+
+
+/* Possible values for cache hints */
+
+#define PAL_CACHE_HINT_TEMP_1		0	/* Temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_1		1	/* Non-temporal level 1 */
+#define PAL_CACHE_HINT_NTEMP_ALL	3	/* Non-temporal all levels */
+
+/* Processor cache protection  information */
+typedef union pal_cache_protection_element_u {
+	u32			pcpi_data;
+	struct {
+		u32		data_bits	: 8, /* # data bits covered by
+						      * each unit of protection
+						      */
+
+				tagprot_lsb	: 6, /* Least -do- */
+				tagprot_msb	: 6, /* Most Sig. tag address
+						      * bit that this
+						      * protection covers.
+						      */
+				prot_bits	: 6, /* # of protection bits */
+				method		: 4, /* Protection method */
+				t_d		: 2; /* Indicates which part
+						      * of the cache this
+						      * protection encoding
+						      * applies.
+						      */
+	} pcp_info;
+} pal_cache_protection_element_t;
+
+#define pcpi_cache_prot_part	pcp_info.t_d
+#define pcpi_prot_method	pcp_info.method
+#define pcpi_prot_bits		pcp_info.prot_bits
+#define pcpi_tagprot_msb	pcp_info.tagprot_msb
+#define pcpi_tagprot_lsb	pcp_info.tagprot_lsb
+#define pcpi_data_bits		pcp_info.data_bits
+
+/* Processor cache part encodings */
+#define PAL_CACHE_PROT_PART_DATA	0	/* Data protection  */
+#define PAL_CACHE_PROT_PART_TAG		1	/* Tag  protection */
+#define PAL_CACHE_PROT_PART_TAG_DATA	2	/* Tag+data protection (tag is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_DATA_TAG	3	/* Data+tag protection (data is
+						 * more significant )
+						 */
+#define PAL_CACHE_PROT_PART_MAX		6
+
+
+typedef struct pal_cache_protection_info_s {
+	pal_status_t			pcpi_status;
+	pal_cache_protection_element_t	pcp_info[PAL_CACHE_PROT_PART_MAX];
+} pal_cache_protection_info_t;
+
+
+/* Processor cache protection method encodings */
+#define PAL_CACHE_PROT_METHOD_NONE		0	/* No protection */
+#define PAL_CACHE_PROT_METHOD_ODD_PARITY	1	/* Odd parity */
+#define PAL_CACHE_PROT_METHOD_EVEN_PARITY	2	/* Even parity */
+#define PAL_CACHE_PROT_METHOD_ECC		3	/* ECC protection */
+
+
+/* Processor cache line identification in the heirarchy */
+typedef union pal_cache_line_id_u {
+	u64			pclid_data;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * heirarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				reserved	: 32;	/* 63-32 is reserved*/
+	} pclid_info_read;
+	struct {
+		u64		cache_type	: 8,	/* 7-0 cache type */
+				level		: 8,	/* 15-8 level of the
+							 * cache in the
+							 * heirarchy.
+							 */
+				way		: 8,	/* 23-16 way in the set
+							 */
+				part		: 8,	/* 31-24 part of the
+							 * cache
+							 */
+				mesi		: 8,	/* 39-32 cache line
+							 * state
+							 */
+				start		: 8,	/* 47-40 lsb of data to
+							 * invert
+							 */
+				length		: 8,	/* 55-48 #bits to
+							 * invert
+							 */
+				trigger		: 8;	/* 63-56 Trigger error
+							 * by doing a load
+							 * after the write
+							 */
+
+	} pclid_info_write;
+} pal_cache_line_id_u_t;
+
+#define pclid_read_part		pclid_info_read.part
+#define pclid_read_way		pclid_info_read.way
+#define pclid_read_level	pclid_info_read.level
+#define pclid_read_cache_type	pclid_info_read.cache_type
+
+#define pclid_write_trigger	pclid_info_write.trigger
+#define pclid_write_length	pclid_info_write.length
+#define pclid_write_start	pclid_info_write.start
+#define pclid_write_mesi	pclid_info_write.mesi
+#define pclid_write_part	pclid_info_write.part
+#define pclid_write_way		pclid_info_write.way
+#define pclid_write_level	pclid_info_write.level
+#define pclid_write_cache_type	pclid_info_write.cache_type
+
+/* Processor cache line part encodings */
+#define PAL_CACHE_LINE_ID_PART_DATA		0	/* Data */
+#define PAL_CACHE_LINE_ID_PART_TAG		1	/* Tag */
+#define PAL_CACHE_LINE_ID_PART_DATA_PROT	2	/* Data protection */
+#define PAL_CACHE_LINE_ID_PART_TAG_PROT		3	/* Tag protection */
+#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT	4	/* Data+tag
+							 * protection
+							 */
+typedef struct pal_cache_line_info_s {
+	pal_status_t		pcli_status;		/* Return status of the read cache line
+							 * info call.
+							 */
+	u64			pcli_data;		/* 64-bit data, tag, protection bits .. */
+	u64			pcli_data_len;		/* data length in bits */
+	pal_cache_line_state_t	pcli_cache_line_state;	/* mesi state */
+
+} pal_cache_line_info_t;
+
+
+/* Machine Check related crap */
+
+/* Pending event status bits  */
+typedef u64					pal_mc_pending_events_t;
+
+#define PAL_MC_PENDING_MCA			(1 << 0)
+#define PAL_MC_PENDING_INIT			(1 << 1)
+
+/* Error information type */
+typedef u64					pal_mc_info_index_t;
+
+#define PAL_MC_INFO_PROCESSOR			0	/* Processor */
+#define PAL_MC_INFO_CACHE_CHECK			1	/* Cache check */
+#define PAL_MC_INFO_TLB_CHECK			2	/* Tlb check */
+#define PAL_MC_INFO_BUS_CHECK			3	/* Bus check */
+#define PAL_MC_INFO_REQ_ADDR			4	/* Requestor address */
+#define PAL_MC_INFO_RESP_ADDR			5	/* Responder address */
+#define PAL_MC_INFO_TARGET_ADDR			6	/* Target address */
+#define PAL_MC_INFO_IMPL_DEP			7	/* Implementation
+							 * dependent
+							 */
+
+
+typedef struct pal_process_state_info_s {
+	u64		reserved1	: 2,
+			rz		: 1,	/* PAL_CHECK processor
+						 * rendezvous
+						 * successful.
+						 */
+
+			ra		: 1,	/* PAL_CHECK attempted
+						 * a rendezvous.
+						 */
+			me		: 1,	/* Distinct multiple
+						 * errors occurred
+						 */
+
+			mn		: 1,	/* Min. state save
+						 * area has been
+						 * registered with PAL
+						 */
+
+			sy		: 1,	/* Storage integrity
+						 * synched
+						 */
+
+
+			co		: 1,	/* Continuable */
+			ci		: 1,	/* MC isolated */
+			us		: 1,	/* Uncontained storage
+						 * damage.
+						 */
+
+
+			hd		: 1,	/* Non-essential hw
+						 * lost (no loss of
+						 * functionality)
+						 * causing the
+						 * processor to run in
+						 * degraded mode.
+						 */
+
+			tl		: 1,	/* 1 => MC occurred
+						 * after an instr was
+						 * executed but before
+						 * the trap that
+						 * resulted from instr
+						 * execution was
+						 * generated.
+						 * (Trap Lost )
+						 */
+			mi		: 1,	/* More information available
+						 * call PAL_MC_ERROR_INFO
+						 */
+			pi		: 1,	/* Precise instruction pointer */
+			pm		: 1,	/* Precise min-state save area */
+
+			dy		: 1,	/* Processor dynamic
+						 * state valid
+						 */
+
+
+			in		: 1,	/* 0 = MC, 1 = INIT */
+			rs		: 1,	/* RSE valid */
+			cm		: 1,	/* MC corrected */
+			ex		: 1,	/* MC is expected */
+			cr		: 1,	/* Control regs valid*/
+			pc		: 1,	/* Perf cntrs valid */
+			dr		: 1,	/* Debug regs valid */
+			tr		: 1,	/* Translation regs
+						 * valid
+						 */
+			rr		: 1,	/* Region regs valid */
+			ar		: 1,	/* App regs valid */
+			br		: 1,	/* Branch regs valid */
+			pr		: 1,	/* Predicate registers
+						 * valid
+						 */
+
+			fp		: 1,	/* fp registers valid*/
+			b1		: 1,	/* Preserved bank one
+						 * general registers
+						 * are valid
+						 */
+			b0		: 1,	/* Preserved bank zero
+						 * general registers
+						 * are valid
+						 */
+			gr		: 1,	/* General registers
+						 * are valid
+						 * (excl. banked regs)
+						 */
+			dsize		: 16,	/* size of dynamic
+						 * state returned
+						 * by the processor
+						 */
+
+			reserved2	: 11,
+			cc		: 1,	/* Cache check */
+			tc		: 1,	/* TLB check */
+			bc		: 1,	/* Bus check */
+			rc		: 1,	/* Register file check */
+			uc		: 1;	/* Uarch check */
+
+} pal_processor_state_info_t;
+
+typedef struct pal_cache_check_info_s {
+	u64		op		: 4,	/* Type of cache
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			level		: 2,	/* Cache level */
+			reserved1	: 2,
+			dl		: 1,	/* Failure in data part
+						 * of cache line
+						 */
+			tl		: 1,	/* Failure in tag part
+						 * of cache line
+						 */
+			dc		: 1,	/* Failure in dcache */
+			ic		: 1,	/* Failure in icache */
+			mesi		: 3,	/* Cache line state */
+			mv		: 1,	/* mesi valid */
+			way		: 5,	/* Way in which the
+						 * error occurred
+						 */
+			wiv		: 1,	/* Way field valid */
+			reserved2	: 10,
+
+			index		: 20,	/* Cache line index */
+			reserved3	: 2,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_cache_check_info_t;
+
+typedef struct pal_tlb_check_info_s {
+
+	u64		tr_slot		: 8,	/* Slot# of TR where
+						 * error occurred
+						 */
+			trv		: 1,	/* tr_slot field is valid */
+			reserved1	: 1,
+			level		: 2,	/* TLB level where failure occurred */
+			reserved2	: 4,
+			dtr		: 1,	/* Fail in data TR */
+			itr		: 1,	/* Fail in inst TR */
+			dtc		: 1,	/* Fail in data TC */
+			itc		: 1,	/* Fail in inst. TC */
+			op		: 4,	/* Cache operation */
+			reserved3	: 30,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_tlb_check_info_t;
+
+typedef struct pal_bus_check_info_s {
+	u64		size		: 5,	/* Xaction size */
+			ib		: 1,	/* Internal bus error */
+			eb		: 1,	/* External bus error */
+			cc		: 1,	/* Error occurred
+						 * during cache-cache
+						 * transfer.
+						 */
+			type		: 8,	/* Bus xaction type*/
+			sev		: 5,	/* Bus error severity*/
+			hier		: 2,	/* Bus hierarchy level */
+			reserved1	: 1,
+			bsi		: 8,	/* Bus error status
+						 * info
+						 */
+			reserved2	: 22,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_bus_check_info_t;
+
+typedef struct pal_reg_file_check_info_s {
+	u64		id		: 4,	/* Register file identifier */
+			op		: 4,	/* Type of register
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			reg_num		: 7,	/* Register number */
+			rnv		: 1,	/* reg_num valid */
+			reserved2	: 38,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			reserved3	: 3,
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_reg_file_check_info_t;
+
+typedef struct pal_uarch_check_info_s {
+	u64		sid		: 5,	/* Structure identification */
+			level		: 3,	/* Level of failure */
+			array_id	: 4,	/* Array identification */
+			op		: 4,	/* Type of
+						 * operation that
+						 * caused the machine
+						 * check.
+						 */
+			way		: 6,	/* Way of structure */
+			wv		: 1,	/* way valid */
+			xv		: 1,	/* index valid */
+			reserved1	: 8,
+			index		: 8,	/* Index or set of the uarch
+						 * structure that failed.
+						 */
+			reserved2	: 24,
+
+			is		: 1,	/* instruction set (1 == ia32) */
+			iv		: 1,	/* instruction set field valid */
+			pl		: 2,	/* privilege level */
+			pv		: 1,	/* privilege level field valid */
+			mcc		: 1,	/* Machine check corrected */
+			tv		: 1,	/* Target address
+						 * structure is valid
+						 */
+			rq		: 1,	/* Requester identifier
+						 * structure is valid
+						 */
+			rp		: 1,	/* Responder identifier
+						 * structure is valid
+						 */
+			pi		: 1;	/* Precise instruction pointer
+						 * structure is valid
+						 */
+} pal_uarch_check_info_t;
+
+typedef union pal_mc_error_info_u {
+	u64				pmei_data;
+	pal_processor_state_info_t	pme_processor;
+	pal_cache_check_info_t		pme_cache;
+	pal_tlb_check_info_t		pme_tlb;
+	pal_bus_check_info_t		pme_bus;
+	pal_reg_file_check_info_t	pme_reg_file;
+	pal_uarch_check_info_t		pme_uarch;
+} pal_mc_error_info_t;
+
+#define pmci_proc_unknown_check			pme_processor.uc
+#define pmci_proc_bus_check			pme_processor.bc
+#define pmci_proc_tlb_check			pme_processor.tc
+#define pmci_proc_cache_check			pme_processor.cc
+#define pmci_proc_dynamic_state_size		pme_processor.dsize
+#define pmci_proc_gpr_valid			pme_processor.gr
+#define pmci_proc_preserved_bank0_gpr_valid	pme_processor.b0
+#define pmci_proc_preserved_bank1_gpr_valid	pme_processor.b1
+#define pmci_proc_fp_valid			pme_processor.fp
+#define pmci_proc_predicate_regs_valid		pme_processor.pr
+#define pmci_proc_branch_regs_valid		pme_processor.br
+#define pmci_proc_app_regs_valid		pme_processor.ar
+#define pmci_proc_region_regs_valid		pme_processor.rr
+#define pmci_proc_translation_regs_valid	pme_processor.tr
+#define pmci_proc_debug_regs_valid		pme_processor.dr
+#define pmci_proc_perf_counters_valid		pme_processor.pc
+#define pmci_proc_control_regs_valid		pme_processor.cr
+#define pmci_proc_machine_check_expected	pme_processor.ex
+#define pmci_proc_machine_check_corrected	pme_processor.cm
+#define pmci_proc_rse_valid			pme_processor.rs
+#define pmci_proc_machine_check_or_init		pme_processor.in
+#define pmci_proc_dynamic_state_valid		pme_processor.dy
+#define pmci_proc_operation			pme_processor.op
+#define pmci_proc_trap_lost			pme_processor.tl
+#define pmci_proc_hardware_damage		pme_processor.hd
+#define pmci_proc_uncontained_storage_damage	pme_processor.us
+#define pmci_proc_machine_check_isolated	pme_processor.ci
+#define pmci_proc_continuable			pme_processor.co
+#define pmci_proc_storage_intergrity_synced	pme_processor.sy
+#define pmci_proc_min_state_save_area_regd	pme_processor.mn
+#define	pmci_proc_distinct_multiple_errors	pme_processor.me
+#define pmci_proc_pal_attempted_rendezvous	pme_processor.ra
+#define pmci_proc_pal_rendezvous_complete	pme_processor.rz
+
+
+#define pmci_cache_level			pme_cache.level
+#define pmci_cache_line_state			pme_cache.mesi
+#define pmci_cache_line_state_valid		pme_cache.mv
+#define pmci_cache_line_index			pme_cache.index
+#define pmci_cache_instr_cache_fail		pme_cache.ic
+#define pmci_cache_data_cache_fail		pme_cache.dc
+#define pmci_cache_line_tag_fail		pme_cache.tl
+#define pmci_cache_line_data_fail		pme_cache.dl
+#define pmci_cache_operation			pme_cache.op
+#define pmci_cache_way_valid			pme_cache.wv
+#define pmci_cache_target_address_valid		pme_cache.tv
+#define pmci_cache_way				pme_cache.way
+#define pmci_cache_mc				pme_cache.mc
+
+#define pmci_tlb_instr_translation_cache_fail	pme_tlb.itc
+#define pmci_tlb_data_translation_cache_fail	pme_tlb.dtc
+#define pmci_tlb_instr_translation_reg_fail	pme_tlb.itr
+#define pmci_tlb_data_translation_reg_fail	pme_tlb.dtr
+#define pmci_tlb_translation_reg_slot		pme_tlb.tr_slot
+#define pmci_tlb_mc				pme_tlb.mc
+
+#define pmci_bus_status_info			pme_bus.bsi
+#define pmci_bus_req_address_valid		pme_bus.rq
+#define pmci_bus_resp_address_valid		pme_bus.rp
+#define pmci_bus_target_address_valid		pme_bus.tv
+#define pmci_bus_error_severity			pme_bus.sev
+#define pmci_bus_transaction_type		pme_bus.type
+#define pmci_bus_cache_cache_transfer		pme_bus.cc
+#define pmci_bus_transaction_size		pme_bus.size
+#define pmci_bus_internal_error			pme_bus.ib
+#define pmci_bus_external_error			pme_bus.eb
+#define pmci_bus_mc				pme_bus.mc
+
+/*
+ * NOTE: this min_state_save area struct only includes the 1KB
+ * architectural state save area.  The other 3 KB is scratch space
+ * for PAL.
+ */
+
+typedef struct pal_min_state_area_s {
+	u64	pmsa_nat_bits;		/* nat bits for saved GRs  */
+	u64	pmsa_gr[15];		/* GR1	- GR15		   */
+	u64	pmsa_bank0_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_bank1_gr[16];	/* GR16 - GR31		   */
+	u64	pmsa_pr;		/* predicate registers	   */
+	u64	pmsa_br0;		/* branch register 0	   */
+	u64	pmsa_rsc;		/* ar.rsc		   */
+	u64	pmsa_iip;		/* cr.iip		   */
+	u64	pmsa_ipsr;		/* cr.ipsr		   */
+	u64	pmsa_ifs;		/* cr.ifs		   */
+	u64	pmsa_xip;		/* previous iip		   */
+	u64	pmsa_xpsr;		/* previous psr		   */
+	u64	pmsa_xfs;		/* previous ifs		   */
+	u64	pmsa_br1;		/* branch register 1	   */
+	u64	pmsa_reserved[70];	/* pal_min_state_area should total to 1KB */
+} pal_min_state_area_t;
+
+
+struct ia64_pal_retval {
+	/*
+	 * A zero status value indicates call completed without error.
+	 * A negative status value indicates reason of call failure.
+	 * A positive status value indicates success but an
+	 * informational value should be printed (e.g., "reboot for
+	 * change to take effect").
+	 */
+	s64 status;
+	u64 v0;
+	u64 v1;
+	u64 v2;
+};
+
+/*
+ * Note: Currently unused PAL arguments are generally labeled
+ * "reserved" so the value specified in the PAL documentation
+ * (generally 0) MUST be passed.  Reserved parameters are not optional
+ * parameters.
+ */
+extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64);
+extern void ia64_save_scratch_fpregs (struct ia64_fpreg *);
+extern void ia64_load_scratch_fpregs (struct ia64_fpreg *);
+
+#define PAL_CALL(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_static(a0, a1, a2, a3, 0);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_static(a0, a1, a2, a3, 1);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];			\
+	ia64_save_scratch_fpregs(fr);			\
+	iprv = ia64_pal_call_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);			\
+} while (0)
+
+#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do {			\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_static(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do {		\
+	struct ia64_fpreg fr[6];				\
+	ia64_save_scratch_fpregs(fr);				\
+	iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3);	\
+	ia64_load_scratch_fpregs(fr);				\
+} while (0)
+
+typedef int (*ia64_pal_handler) (u64, ...);
+extern ia64_pal_handler ia64_pal;
+extern void ia64_pal_handler_init (void *);
+
+extern ia64_pal_handler ia64_pal;
+
+extern pal_cache_config_info_t		l0d_cache_config_info;
+extern pal_cache_config_info_t		l0i_cache_config_info;
+extern pal_cache_config_info_t		l1_cache_config_info;
+extern pal_cache_config_info_t		l2_cache_config_info;
+
+extern pal_cache_protection_info_t	l0d_cache_protection_info;
+extern pal_cache_protection_info_t	l0i_cache_protection_info;
+extern pal_cache_protection_info_t	l1_cache_protection_info;
+extern pal_cache_protection_info_t	l2_cache_protection_info;
+
+extern pal_cache_config_info_t		pal_cache_config_info_get(pal_cache_level_t,
+								  pal_cache_type_t);
+
+extern pal_cache_protection_info_t	pal_cache_protection_info_get(pal_cache_level_t,
+								      pal_cache_type_t);
+
+
+extern void				pal_error(int);
+
+
+/* Useful wrappers for the current list of pal procedures */
+
+typedef union pal_bus_features_u {
+	u64	pal_bus_features_val;
+	struct {
+		u64	pbf_reserved1				:	29;
+		u64	pbf_req_bus_parking			:	1;
+		u64	pbf_bus_lock_mask			:	1;
+		u64	pbf_enable_half_xfer_rate		:	1;
+		u64	pbf_reserved2				:	22;
+		u64	pbf_disable_xaction_queueing		:	1;
+		u64	pbf_disable_resp_err_check		:	1;
+		u64	pbf_disable_berr_check			:	1;
+		u64	pbf_disable_bus_req_internal_err_signal	:	1;
+		u64	pbf_disable_bus_req_berr_signal		:	1;
+		u64	pbf_disable_bus_init_event_check	:	1;
+		u64	pbf_disable_bus_init_event_signal	:	1;
+		u64	pbf_disable_bus_addr_err_check		:	1;
+		u64	pbf_disable_bus_addr_err_signal		:	1;
+		u64	pbf_disable_bus_data_err_check		:	1;
+	} pal_bus_features_s;
+} pal_bus_features_u_t;
+
+extern void pal_bus_features_print (u64);
+
+/* Provide information about configurable processor bus features */
+static inline s64
+ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail,
+			   pal_bus_features_u_t *features_status,
+			   pal_bus_features_u_t *features_control)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0);
+	if (features_avail)
+		features_avail->pal_bus_features_val = iprv.v0;
+	if (features_status)
+		features_status->pal_bus_features_val = iprv.v1;
+	if (features_control)
+		features_control->pal_bus_features_val = iprv.v2;
+	return iprv.status;
+}
+
+/* Enables/disables specific processor bus features */
+static inline s64
+ia64_pal_bus_set_features (pal_bus_features_u_t feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0);
+	return iprv.status;
+}
+
+/* Get detailed cache information */
+static inline s64
+ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		conf->pcci_status                 = iprv.status;
+		conf->pcci_info_1.pcci1_data      = iprv.v0;
+		conf->pcci_info_2.pcci2_data      = iprv.v1;
+		conf->pcci_reserved               = iprv.v2;
+	}
+	return iprv.status;
+
+}
+
+/* Get detailed cche protection information */
+static inline s64
+ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0);
+
+	if (iprv.status == 0) {
+		prot->pcpi_status           = iprv.status;
+		prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff;
+		prot->pcp_info[1].pcpi_data = iprv.v0 >> 32;
+		prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff;
+		prot->pcp_info[3].pcpi_data = iprv.v1 >> 32;
+		prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff;
+		prot->pcp_info[5].pcpi_data = iprv.v2 >> 32;
+	}
+	return iprv.status;
+}
+
+/*
+ * Flush the processor instruction or data caches.  *PROGRESS must be
+ * initialized to zero before calling this for the first time..
+ */
+static inline s64
+ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_IC_OFF(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress);
+	if (vector)
+		*vector = iprv.v0;
+	*progress = iprv.v1;
+	return iprv.status;
+}
+
+
+/* Initialize the processor controlled caches */
+static inline s64
+ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest);
+	return iprv.status;
+}
+
+/* Initialize the tags and data of a data or unified cache line of
+ * processor controlled cache to known values without the availability
+ * of backing memory.
+ */
+static inline s64
+ia64_pal_cache_line_init (u64 physical_addr, u64 data_value)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0);
+	return iprv.status;
+}
+
+
+/* Read the data and tag of a processor controlled cache line for diags */
+static inline s64
+ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_READ, line_id.pclid_data, physical_addr, 0);
+	return iprv.status;
+}
+
+/* Return summary information about the heirarchy of caches controlled by the processor */
+static inline s64
+ia64_pal_cache_summary (u64 *cache_levels, u64 *unique_caches)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0);
+	if (cache_levels)
+		*cache_levels = iprv.v0;
+	if (unique_caches)
+		*unique_caches = iprv.v1;
+	return iprv.status;
+}
+
+/* Write the data and tag of a processor-controlled cache line for diags */
+static inline s64
+ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_CACHE_WRITE, line_id.pclid_data, physical_addr, data);
+	return iprv.status;
+}
+
+
+/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics,
+		    u64 *buffer_size, u64 *buffer_align)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics);
+	if (buffer_size)
+		*buffer_size = iprv.v0;
+	if (buffer_align)
+		*buffer_align = iprv.v1;
+	return iprv.status;
+}
+
+/* Copy relocatable PAL procedures from ROM to memory */
+static inline s64
+ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor);
+	if (pal_proc_offset)
+		*pal_proc_offset = iprv.v0;
+	return iprv.status;
+}
+
+/* Return the number of instruction and data debug register pairs */
+static inline s64
+ia64_pal_debug_info (u64 *inst_regs,  u64 *data_regs)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0);
+	if (inst_regs)
+		*inst_regs = iprv.v0;
+	if (data_regs)
+		*data_regs = iprv.v1;
+
+	return iprv.status;
+}
+
+#ifdef TBD
+/* Switch from IA64-system environment to IA-32 system environment */
+static inline s64
+ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3);
+	return iprv.status;
+}
+#endif
+
+/* Get unique geographical address of this processor on its bus */
+static inline s64
+ia64_pal_fixed_addr (u64 *global_unique_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0);
+	if (global_unique_addr)
+		*global_unique_addr = iprv.v0;
+	return iprv.status;
+}
+
+/* Get base frequency of the platform if generated by the processor */
+static inline s64
+ia64_pal_freq_base (u64 *platform_base_freq)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0);
+	if (platform_base_freq)
+		*platform_base_freq = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * Get the ratios for processor frequency, bus frequency and interval timer to
+ * to base frequency of the platform
+ */
+static inline s64
+ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio,
+		      struct pal_freq_ratio *itc_ratio)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0);
+	if (proc_ratio)
+		*(u64 *)proc_ratio = iprv.v0;
+	if (bus_ratio)
+		*(u64 *)bus_ratio = iprv.v1;
+	if (itc_ratio)
+		*(u64 *)itc_ratio = iprv.v2;
+	return iprv.status;
+}
+
+/* Make the processor enter HALT or one of the implementation dependent low
+ * power states where prefetching and execution are suspended and cache and
+ * TLB coherency is not maintained.
+ */
+static inline s64
+ia64_pal_halt (u64 halt_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0);
+	return iprv.status;
+}
+
+typedef union pal_power_mgmt_info_u {
+	u64			ppmi_data;
+	struct {
+	       u64		exit_latency		: 16,
+				entry_latency		: 16,
+				power_consumption	: 28,
+				im			: 1,
+				co			: 1,
+				reserved		: 2;
+	} pal_power_mgmt_info_s;
+} pal_power_mgmt_info_u_t;
+
+/* Return information about processor's optional power management capabilities. */
+static inline s64
+ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0);
+	return iprv.status;
+}
+
+/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are
+ * suspended, but cache and TLB coherency is maintained.
+ */
+static inline s64
+ia64_pal_halt_light (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Clear all the processor error logging   registers and reset the indicator that allows
+ * the error logging registers to be written. This procedure also checks the pending
+ * machine check bit and pending INIT bit and reports their states.
+ */
+static inline s64
+ia64_pal_mc_clear_log (u64 *pending_vector)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0);
+	if (pending_vector)
+		*pending_vector = iprv.v0;
+	return iprv.status;
+}
+
+/* Ensure that all outstanding transactions in a processor are completed or that any
+ * MCA due to thes outstanding transaction is taken.
+ */
+static inline s64
+ia64_pal_mc_drain (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Return the machine check dynamic processor state */
+static inline s64
+ia64_pal_mc_dynamic_state (u64 offset, u64 *size, u64 *pds)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, offset, 0, 0);
+	if (size)
+		*size = iprv.v0;
+	if (pds)
+		*pds = iprv.v1;
+	return iprv.status;
+}
+
+/* Return processor machine check information */
+static inline s64
+ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0);
+	if (size)
+		*size = iprv.v0;
+	if (error_info)
+		*error_info = iprv.v1;
+	return iprv.status;
+}
+
+/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot
+ * attempt to correct any expected machine checks.
+ */
+static inline s64
+ia64_pal_mc_expected (u64 expected, u64 *previous)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0);
+	if (previous)
+		*previous = iprv.v0;
+	return iprv.status;
+}
+
+/* Register a platform dependent location with PAL to which it can save
+ * minimal processor state in the event of a machine check or initialization
+ * event.
+ */
+static inline s64
+ia64_pal_mc_register_mem (u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, 0, 0);
+	return iprv.status;
+}
+
+/* Restore minimal architectural processor state, set CMC interrupt if necessary
+ * and resume execution
+ */
+static inline s64
+ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0);
+	return iprv.status;
+}
+
+/* Return the memory attributes implemented by the processor */
+static inline s64
+ia64_pal_mem_attrib (u64 *mem_attrib)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0);
+	if (mem_attrib)
+		*mem_attrib = iprv.v0 & 0xff;
+	return iprv.status;
+}
+
+/* Return the amount of memory needed for second phase of processor
+ * self-test and the required alignment of memory.
+ */
+static inline s64
+ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0);
+	if (bytes_needed)
+		*bytes_needed = iprv.v0;
+	if (alignment)
+		*alignment = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_perf_mon_info_u {
+	u64			  ppmi_data;
+	struct {
+	       u64		generic		: 8,
+				width		: 8,
+				cycles		: 8,
+				retired		: 8,
+				reserved	: 32;
+	} pal_perf_mon_info_s;
+} pal_perf_mon_info_u_t;
+
+/* Return the performance monitor information about what can be counted
+ * and how to configure the monitors to count the desired events.
+ */
+static inline s64
+ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0);
+	if (pm_info)
+		pm_info->ppmi_data = iprv.v0;
+	return iprv.status;
+}
+
+/* Specifies the physical address of the processor interrupt block
+ * and I/O port space.
+ */
+static inline s64
+ia64_pal_platform_addr (u64 type, u64 physical_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0);
+	return iprv.status;
+}
+
+/* Set the SAL PMI entrypoint in memory */
+static inline s64
+ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0);
+	return iprv.status;
+}
+
+struct pal_features_s;
+/* Provide information about configurable processor features */
+static inline s64
+ia64_pal_proc_get_features (u64 *features_avail,
+			    u64 *features_status,
+			    u64 *features_control)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, 0, 0);
+	if (iprv.status == 0) {
+		*features_avail   = iprv.v0;
+		*features_status  = iprv.v1;
+		*features_control = iprv.v2;
+	}
+	return iprv.status;
+}
+
+/* Enable/disable processor dependent features */
+static inline s64
+ia64_pal_proc_set_features (u64 feature_select)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0);
+	return iprv.status;
+}
+
+/*
+ * Put everything in a struct so we avoid the global offset table whenever
+ * possible.
+ */
+typedef struct ia64_ptce_info_s {
+	u64		base;
+	u32		count[2];
+	u32		stride[2];
+} ia64_ptce_info_t;
+
+/* Return the information required for the architected loop used to purge
+ * (initialize) the entire TC
+ */
+static inline s64
+ia64_get_ptce (ia64_ptce_info_t *ptce)
+{
+	struct ia64_pal_retval iprv;
+
+	if (!ptce)
+		return -1;
+
+	PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0);
+	if (iprv.status == 0) {
+		ptce->base = iprv.v0;
+		ptce->count[0] = iprv.v1 >> 32;
+		ptce->count[1] = iprv.v1 & 0xffffffff;
+		ptce->stride[0] = iprv.v2 >> 32;
+		ptce->stride[1] = iprv.v2 & 0xffffffff;
+	}
+	return iprv.status;
+}
+
+/* Return info about implemented application and control registers. */
+static inline s64
+ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0);
+	if (reg_info_1)
+		*reg_info_1 = iprv.v0;
+	if (reg_info_2)
+		*reg_info_2 = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_hints_u {
+	u64			ph_data;
+	struct {
+	       u64		si		: 1,
+				li		: 1,
+				reserved	: 62;
+	} pal_hints_s;
+} pal_hints_u_t;
+
+/* Return information about the register stack and RSE for this processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_rse_info (u64 *num_phys_stacked, pal_hints_u_t *hints)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0);
+	if (num_phys_stacked)
+		*num_phys_stacked = iprv.v0;
+	if (hints)
+		hints->ph_data = iprv.v1;
+	return iprv.status;
+}
+
+/* Cause the processor to enter	SHUTDOWN state, where prefetching and execution are
+ * suspended, but cause cache and TLB coherency to be maintained.
+ * This is usually called in IA-32 mode.
+ */
+static inline s64
+ia64_pal_shutdown (void)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0);
+	return iprv.status;
+}
+
+/* Perform the second phase of processor self-test. */
+static inline s64
+ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes);
+	if (self_test_state)
+		*self_test_state = iprv.v0;
+	return iprv.status;
+}
+
+typedef union  pal_version_u {
+	u64	pal_version_val;
+	struct {
+		u64	pv_pal_b_rev		:	8;
+		u64	pv_pal_b_model		:	8;
+		u64	pv_reserved1		:	8;
+		u64	pv_pal_vendor		:	8;
+		u64	pv_pal_a_rev		:	8;
+		u64	pv_pal_a_model		:	8;
+		u64	pv_reserved2		:	16;
+	} pal_version_s;
+} pal_version_u_t;
+
+
+/* Return PAL version information */
+static inline s64
+ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0);
+	if (pal_min_version)
+		pal_min_version->pal_version_val = iprv.v0;
+
+	if (pal_cur_version)
+		pal_cur_version->pal_version_val = iprv.v1;
+
+	return iprv.status;
+}
+
+typedef union pal_tc_info_u {
+	u64			pti_val;
+	struct {
+	       u64		num_sets	:	8,
+				associativity	:	8,
+				num_entries	:	16,
+				pf		:	1,
+				unified		:	1,
+				reduce_tr	:	1,
+				reserved	:	29;
+	} pal_tc_info_s;
+} pal_tc_info_u_t;
+
+#define tc_reduce_tr		pal_tc_info_s.reduce_tr
+#define tc_unified		pal_tc_info_s.unified
+#define tc_pf			pal_tc_info_s.pf
+#define tc_num_entries		pal_tc_info_s.num_entries
+#define tc_associativity	pal_tc_info_s.associativity
+#define tc_num_sets		pal_tc_info_s.num_sets
+
+
+/* Return information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_info (u64 tc_level, u64 tc_type,  pal_tc_info_u_t *tc_info, u64 *tc_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0);
+	if (tc_info)
+		tc_info->pti_val = iprv.v0;
+	if (tc_pages)
+		*tc_pages = iprv.v1;
+	return iprv.status;
+}
+
+/* Get page size information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_page_size (u64 *tr_pages, u64 *vw_pages)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0);
+	if (tr_pages)
+		*tr_pages = iprv.v0;
+	if (vw_pages)
+		*vw_pages = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_vm_info_1_u {
+	u64			pvi1_val;
+	struct {
+		u64		vw		: 1,
+				phys_add_size	: 7,
+				key_size	: 8,
+				max_pkr		: 8,
+				hash_tag_id	: 8,
+				max_dtr_entry	: 8,
+				max_itr_entry	: 8,
+				max_unique_tcs	: 8,
+				num_tc_levels	: 8;
+	} pal_vm_info_1_s;
+} pal_vm_info_1_u_t;
+
+typedef union pal_vm_info_2_u {
+	u64			pvi2_val;
+	struct {
+		u64		impl_va_msb	: 8,
+				rid_size	: 8,
+				reserved	: 48;
+	} pal_vm_info_2_s;
+} pal_vm_info_2_u_t;
+
+/* Get summary information about the virtual memory characteristics of the processor
+ * implementation.
+ */
+static inline s64
+ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0);
+	if (vm_info_1)
+		vm_info_1->pvi1_val = iprv.v0;
+	if (vm_info_2)
+		vm_info_2->pvi2_val = iprv.v1;
+	return iprv.status;
+}
+
+typedef union pal_itr_valid_u {
+	u64			piv_val;
+	struct {
+	       u64		access_rights_valid	: 1,
+				priv_level_valid	: 1,
+				dirty_bit_valid		: 1,
+				mem_attr_valid		: 1,
+				reserved		: 60;
+	} pal_tr_valid_s;
+} pal_tr_valid_u_t;
+
+/* Read a translation register */
+static inline s64
+ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer));
+	if (tr_valid)
+		tr_valid->piv_val = iprv.v0;
+	return iprv.status;
+}
+
+/*
+ * PAL_PREFETCH_VISIBILITY transaction types
+ */
+#define PAL_VISIBILITY_VIRTUAL		0
+#define PAL_VISIBILITY_PHYSICAL		1
+
+/*
+ * PAL_PREFETCH_VISIBILITY return codes
+ */
+#define PAL_VISIBILITY_OK		1
+#define PAL_VISIBILITY_OK_REMOTE_NEEDED	0
+#define PAL_VISIBILITY_INVAL_ARG	-2
+#define PAL_VISIBILITY_ERROR		-3
+
+static inline s64
+ia64_pal_prefetch_visibility (s64 trans_type)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0);
+	return iprv.status;
+}
+
+/* data structure for getting information on logical to physical mappings */
+typedef union pal_log_overview_u {
+	struct {
+		u64	num_log		:16,	/* Total number of logical
+						 * processors on this die
+						 */
+			tpc		:8,	/* Threads per core */
+			reserved3	:8,	/* Reserved */
+			cpp		:8,	/* Cores per processor */
+			reserved2	:8,	/* Reserved */
+			ppid		:8,	/* Physical processor ID */
+			reserved1	:8;	/* Reserved */
+	} overview_bits;
+	u64 overview_data;
+} pal_log_overview_t;
+
+typedef union pal_proc_n_log_info1_u{
+	struct {
+		u64	tid		:16,	/* Thread id */
+			reserved2	:16,	/* Reserved */
+			cid		:16,	/* Core id */
+			reserved1	:16;	/* Reserved */
+	} ppli1_bits;
+	u64	ppli1_data;
+} pal_proc_n_log_info1_t;
+
+typedef union pal_proc_n_log_info2_u {
+	struct {
+		u64	la		:16,	/* Logical address */
+			reserved	:48;	/* Reserved */
+	} ppli2_bits;
+	u64	ppli2_data;
+} pal_proc_n_log_info2_t;
+
+typedef struct pal_logical_to_physical_s
+{
+	pal_log_overview_t overview;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_logical_to_physical_t;
+
+#define overview_num_log	overview.overview_bits.num_log
+#define overview_tpc		overview.overview_bits.tpc
+#define overview_cpp		overview.overview_bits.cpp
+#define overview_ppid		overview.overview_bits.ppid
+#define log1_tid		ppli1.ppli1_bits.tid
+#define log1_cid		ppli1.ppli1_bits.cid
+#define log2_la			ppli2.ppli2_bits.la
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0);
+
+	if (iprv.status == PAL_STATUS_SUCCESS)
+	{
+		if (proc_number == 0)
+			mapping->overview.overview_data = iprv.v0;
+		mapping->ppli1.ppli1_data = iprv.v1;
+		mapping->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PAL_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/privop.h b/linux-2.6-xen-sparse/include/asm-ia64/privop.h
new file mode 100644
index 0000000000..2a3f783027
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/privop.h
@@ -0,0 +1,59 @@
+#ifndef _ASM_IA64_PRIVOP_H
+#define _ASM_IA64_PRIVOP_H
+
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ */
+
+#include <linux/config.h>
+#ifdef CONFIG_XEN
+#include <asm/xen/privop.h>
+#endif
+
+#ifndef __ASSEMBLY
+
+#ifndef IA64_PARAVIRTUALIZED
+
+#define ia64_getreg			__ia64_getreg
+#define ia64_setreg			__ia64_setreg
+#define ia64_hint			__ia64_hint
+#define ia64_thash			__ia64_thash
+#define ia64_itci			__ia64_itci
+#define ia64_itcd			__ia64_itcd
+#define ia64_itri			__ia64_itri
+#define ia64_itrd			__ia64_itrd
+#define ia64_tpa			__ia64_tpa
+#define ia64_set_ibr			__ia64_set_ibr
+#define ia64_set_pkr			__ia64_set_pkr
+#define ia64_set_pmc			__ia64_set_pmc
+#define ia64_set_pmd			__ia64_set_pmd
+#define ia64_set_rr			__ia64_set_rr
+#define ia64_get_cpuid			__ia64_get_cpuid
+#define ia64_get_ibr			__ia64_get_ibr
+#define ia64_get_pkr			__ia64_get_pkr
+#define ia64_get_pmc			__ia64_get_pmc
+#define ia64_get_pmd			__ia64_get_pmd
+#define ia64_get_rr			__ia64_get_rr
+#define ia64_fc				__ia64_fc
+#define ia64_ssm			__ia64_ssm
+#define ia64_rsm			__ia64_rsm
+#define ia64_ptce			__ia64_ptce
+#define ia64_ptcga			__ia64_ptcga
+#define ia64_ptcl			__ia64_ptcl
+#define ia64_ptri			__ia64_ptri
+#define ia64_ptrd			__ia64_ptrd
+#define	ia64_get_psr_i			__ia64_get_psr_i
+#define ia64_intrin_local_irq_restore	__ia64_intrin_local_irq_restore
+#define ia64_pal_halt_light		__ia64_pal_halt_light
+#define	ia64_leave_kernel		__ia64_leave_kernel
+#define	ia64_leave_syscall		__ia64_leave_syscall
+#define	ia64_switch_to			__ia64_switch_to
+#define	ia64_pal_call_static		__ia64_pal_call_static
+
+#endif /* !IA64_PARAVIRTUALIZED */
+
+#endif /* !__ASSEMBLY */
+
+#endif /* _ASM_IA64_PRIVOP_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/processor.h b/linux-2.6-xen-sparse/include/asm-ia64/processor.h
new file mode 100644
index 0000000000..b8c9e66792
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/processor.h
@@ -0,0 +1,701 @@
+#ifndef _ASM_IA64_PROCESSOR_H
+#define _ASM_IA64_PROCESSOR_H
+
+/*
+ * Copyright (C) 1998-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ *	Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ *
+ * 11/24/98	S.Eranian	added ia64_set_iva()
+ * 12/03/99	D. Mosberger	implement thread_saved_pc() via kernel unwind API
+ * 06/16/00	A. Mallick	added csd/ssd/tssd for ia32 support
+ */
+
+#include <linux/config.h>
+
+#include <asm/intrinsics.h>
+#include <asm/kregs.h>
+#include <asm/ptrace.h>
+#include <asm/ustack.h>
+#include <asm/privop.h>
+
+/* Our arch specific arch_init_sched_domain is in arch/ia64/kernel/domain.c */
+#define ARCH_HAS_SCHED_DOMAIN
+
+#define IA64_NUM_DBG_REGS	8
+/*
+ * Limits for PMC and PMD are set to less than maximum architected values
+ * but should be sufficient for a while
+ */
+#define IA64_NUM_PMC_REGS	32
+#define IA64_NUM_PMD_REGS	32
+
+#define DEFAULT_MAP_BASE	__IA64_UL_CONST(0x2000000000000000)
+#define DEFAULT_TASK_SIZE	__IA64_UL_CONST(0xa000000000000000)
+
+/*
+ * TASK_SIZE really is a mis-named.  It really is the maximum user
+ * space address (plus one).  On IA-64, there are five regions of 2TB
+ * each (assuming 8KB page size), for a total of 8TB of user virtual
+ * address space.
+ */
+#define TASK_SIZE		(current->thread.task_size)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	(current->thread.map_base)
+
+#define IA64_THREAD_FPH_VALID	(__IA64_UL(1) << 0)	/* floating-point high state valid? */
+#define IA64_THREAD_DBG_VALID	(__IA64_UL(1) << 1)	/* debug registers valid? */
+#define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
+#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
+#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
+							/* bit 5 is currently unused */
+#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)	/* don't log any fpswa faults */
+#define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)	/* send a SIGFPE for fpswa faults */
+
+#define IA64_THREAD_UAC_SHIFT	3
+#define IA64_THREAD_UAC_MASK	(IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS)
+#define IA64_THREAD_FPEMU_SHIFT	6
+#define IA64_THREAD_FPEMU_MASK	(IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE)
+
+
+/*
+ * This shift should be large enough to be able to represent 1000000000/itc_freq with good
+ * accuracy while being small enough to fit 10*1000000000<<IA64_NSEC_PER_CYC_SHIFT in 64 bits
+ * (this will give enough slack to represent 10 seconds worth of time as a scaled number).
+ */
+#define IA64_NSEC_PER_CYC_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/threads.h>
+#include <linux/types.h>
+
+#include <asm/fpu.h>
+#include <asm/page.h>
+#include <asm/percpu.h>
+#include <asm/rse.h>
+#include <asm/unwind.h>
+#include <asm/atomic.h>
+#ifdef CONFIG_NUMA
+#include <asm/nodedata.h>
+#endif
+
+/* like above but expressed as bitfields for more efficient access: */
+struct ia64_psr {
+	__u64 reserved0 : 1;
+	__u64 be : 1;
+	__u64 up : 1;
+	__u64 ac : 1;
+	__u64 mfl : 1;
+	__u64 mfh : 1;
+	__u64 reserved1 : 7;
+	__u64 ic : 1;
+	__u64 i : 1;
+	__u64 pk : 1;
+	__u64 reserved2 : 1;
+	__u64 dt : 1;
+	__u64 dfl : 1;
+	__u64 dfh : 1;
+	__u64 sp : 1;
+	__u64 pp : 1;
+	__u64 di : 1;
+	__u64 si : 1;
+	__u64 db : 1;
+	__u64 lp : 1;
+	__u64 tb : 1;
+	__u64 rt : 1;
+	__u64 reserved3 : 4;
+	__u64 cpl : 2;
+	__u64 is : 1;
+	__u64 mc : 1;
+	__u64 it : 1;
+	__u64 id : 1;
+	__u64 da : 1;
+	__u64 dd : 1;
+	__u64 ss : 1;
+	__u64 ri : 2;
+	__u64 ed : 1;
+	__u64 bn : 1;
+	__u64 reserved4 : 19;
+};
+
+/*
+ * CPU type, hardware bug flags, and per-CPU state.  Frequently used
+ * state comes earlier:
+ */
+struct cpuinfo_ia64 {
+	__u32 softirq_pending;
+	__u64 itm_delta;	/* # of clock cycles between clock ticks */
+	__u64 itm_next;		/* interval timer mask value to use for next clock tick */
+	__u64 nsec_per_cyc;	/* (1000000000<<IA64_NSEC_PER_CYC_SHIFT)/itc_freq */
+	__u64 unimpl_va_mask;	/* mask of unimplemented virtual address bits (from PAL) */
+	__u64 unimpl_pa_mask;	/* mask of unimplemented physical address bits (from PAL) */
+	__u64 itc_freq;		/* frequency of ITC counter */
+	__u64 proc_freq;	/* frequency of processor */
+	__u64 cyc_per_usec;	/* itc_freq/1000000 */
+	__u64 ptce_base;
+	__u32 ptce_count[2];
+	__u32 ptce_stride[2];
+	struct task_struct *ksoftirqd;	/* kernel softirq daemon for this CPU */
+
+#ifdef CONFIG_SMP
+	__u64 loops_per_jiffy;
+	int cpu;
+	__u32 socket_id;	/* physical processor socket id */
+	__u16 core_id;		/* core id */
+	__u16 thread_id;	/* thread id */
+	__u16 num_log;		/* Total number of logical processors on
+				 * this socket that were successfully booted */
+	__u8  cores_per_socket;	/* Cores per processor socket */
+	__u8  threads_per_core;	/* Threads per core */
+#endif
+
+	/* CPUID-derived information: */
+	__u64 ppn;
+	__u64 features;
+	__u8 number;
+	__u8 revision;
+	__u8 model;
+	__u8 family;
+	__u8 archrev;
+	char vendor[16];
+
+#ifdef CONFIG_NUMA
+	struct ia64_node_data *node_data;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+
+/*
+ * The "local" data variable.  It refers to the per-CPU data of the currently executing
+ * CPU, much like "current" points to the per-task data of the currently executing task.
+ * Do not use the address of local_cpu_data, since it will be different from
+ * cpu_data(smp_processor_id())!
+ */
+#define local_cpu_data		(&__ia64_per_cpu_var(cpu_info))
+#define cpu_data(cpu)		(&per_cpu(cpu_info, cpu))
+
+extern void identify_cpu (struct cpuinfo_ia64 *);
+extern void print_cpu_info (struct cpuinfo_ia64 *);
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+#define SET_UNALIGN_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK)			\
+				| (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK));	\
+	0;											\
+})
+#define GET_UNALIGN_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+#define SET_FPEMU_CTL(task,value)								\
+({												\
+	(task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK)		\
+			  | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK));	\
+	0;											\
+})
+#define GET_FPEMU_CTL(task,addr)								\
+({												\
+	put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT,	\
+		 (int __user *) (addr));							\
+})
+
+#ifdef CONFIG_IA32_SUPPORT
+struct desc_struct {
+	unsigned int a, b;
+};
+
+#define desc_empty(desc)		(!((desc)->a + (desc)->b))
+#define desc_equal(desc1, desc2)	(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
+
+#define GDT_ENTRY_TLS_ENTRIES	3
+#define GDT_ENTRY_TLS_MIN	6
+#define GDT_ENTRY_TLS_MAX 	(GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+struct partial_page_list;
+#endif
+
+struct thread_struct {
+	__u32 flags;			/* various thread flags (see IA64_THREAD_*) */
+	/* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */
+	__u8 on_ustack;			/* executing on user-stacks? */
+	__u8 pad[3];
+	__u64 ksp;			/* kernel stack pointer */
+	__u64 map_base;			/* base address for get_unmapped_area() */
+	__u64 task_size;		/* limit for task size */
+	__u64 rbs_bot;			/* the base address for the RBS */
+	int last_fph_cpu;		/* CPU that may hold the contents of f32-f127 */
+
+#ifdef CONFIG_IA32_SUPPORT
+	__u64 eflag;			/* IA32 EFLAGS reg */
+	__u64 fsr;			/* IA32 floating pt status reg */
+	__u64 fcr;			/* IA32 floating pt control reg */
+	__u64 fir;			/* IA32 fp except. instr. reg */
+	__u64 fdr;			/* IA32 fp except. data reg */
+	__u64 old_k1;			/* old value of ar.k1 */
+	__u64 old_iob;			/* old IOBase value */
+	struct partial_page_list *ppl;	/* partial page list for 4K page size issue */
+        /* cached TLS descriptors. */
+	struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+
+# define INIT_THREAD_IA32	.eflag =	0,			\
+				.fsr =		0,			\
+				.fcr =		0x17800000037fULL,	\
+				.fir =		0,			\
+				.fdr =		0,			\
+				.old_k1 =	0,			\
+				.old_iob =	0,			\
+				.ppl =		NULL,
+#else
+# define INIT_THREAD_IA32
+#endif /* CONFIG_IA32_SUPPORT */
+#ifdef CONFIG_PERFMON
+	__u64 pmcs[IA64_NUM_PMC_REGS];
+	__u64 pmds[IA64_NUM_PMD_REGS];
+	void *pfm_context;		     /* pointer to detailed PMU context */
+	unsigned long pfm_needs_checking;    /* when >0, pending perfmon work on kernel exit */
+# define INIT_THREAD_PM		.pmcs =			{0UL, },  \
+				.pmds =			{0UL, },  \
+				.pfm_context =		NULL,     \
+				.pfm_needs_checking =	0UL,
+#else
+# define INIT_THREAD_PM
+#endif
+	__u64 dbr[IA64_NUM_DBG_REGS];
+	__u64 ibr[IA64_NUM_DBG_REGS];
+	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
+};
+
+#define INIT_THREAD {						\
+	.flags =	0,					\
+	.on_ustack =	0,					\
+	.ksp =		0,					\
+	.map_base =	DEFAULT_MAP_BASE,			\
+	.rbs_bot =	STACK_TOP - DEFAULT_USER_STACK_SIZE,	\
+	.task_size =	DEFAULT_TASK_SIZE,			\
+	.last_fph_cpu =  -1,					\
+	INIT_THREAD_IA32					\
+	INIT_THREAD_PM						\
+	.dbr =		{0, },					\
+	.ibr =		{0, },					\
+	.fph =		{{{{0}}}, }				\
+}
+
+#define start_thread(regs,new_ip,new_sp) do {							\
+	set_fs(USER_DS);									\
+	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL))		\
+			 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS));		\
+	regs->cr_iip = new_ip;									\
+	regs->ar_rsc = 0xf;		/* eager mode, privilege level 3 */			\
+	regs->ar_rnat = 0;									\
+	regs->ar_bspstore = current->thread.rbs_bot;						\
+	regs->ar_fpsr = FPSR_DEFAULT;								\
+	regs->loadrs = 0;									\
+	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
+	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+	if (unlikely(!current->mm->dumpable)) {							\
+		/*										\
+		 * Zap scratch regs to avoid leaking bits between processes with different	\
+		 * uid/privileges.								\
+		 */										\
+		regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0;					\
+		regs->r1 = 0; regs->r9  = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0;	\
+	}											\
+} while (0)
+
+/* Forward declarations, a strange C thing... */
+struct mm_struct;
+struct task_struct;
+
+/*
+ * Free all resources held by a thread. This is called after the
+ * parent of DEAD_TASK has collected the exit status of the task via
+ * wait().
+ */
+#define release_thread(dead_task)
+
+/* Prepare to copy thread state - unlazy all lazy status */
+#define prepare_to_copy(tsk)	do { } while (0)
+
+/*
+ * This is the mechanism for creating a new kernel thread.
+ *
+ * NOTE 1: Only a kernel-only process (ie the swapper or direct
+ * descendants who haven't done an "execve()") should use this: it
+ * will work within a system call from a "real" process, but the
+ * process memory space will not be free'd until both the parent and
+ * the child have exited.
+ *
+ * NOTE 2: This MUST NOT be an inlined function.  Otherwise, we get
+ * into trouble in init/main.c when the child thread returns to
+ * do_basic_setup() and the timing is such that free_initmem() has
+ * been called already.
+ */
+extern pid_t kernel_thread (int (*fn)(void *), void *arg, unsigned long flags);
+
+/* Get wait channel for task P.  */
+extern unsigned long get_wchan (struct task_struct *p);
+
+/* Return instruction pointer of blocked task TSK.  */
+#define KSTK_EIP(tsk)					\
+  ({							\
+	struct pt_regs *_regs = ia64_task_regs(tsk);	\
+	_regs->cr_iip + ia64_psr(_regs)->ri;		\
+  })
+
+/* Return stack pointer of blocked task TSK.  */
+#define KSTK_ESP(tsk)  ((tsk)->thread.ksp)
+
+extern void ia64_getreg_unknown_kr (void);
+extern void ia64_setreg_unknown_kr (void);
+
+#define ia64_get_kr(regnum)					\
+({								\
+	unsigned long r = 0;					\
+								\
+	switch (regnum) {					\
+	    case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break;	\
+	    case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break;	\
+	    case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break;	\
+	    case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break;	\
+	    case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break;	\
+	    case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break;	\
+	    case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break;	\
+	    case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break;	\
+	    default: ia64_getreg_unknown_kr(); break;		\
+	}							\
+	r;							\
+})
+
+#define ia64_set_kr(regnum, r) 					\
+({								\
+	switch (regnum) {					\
+	    case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break;	\
+	    case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break;	\
+	    case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break;	\
+	    case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break;	\
+	    case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break;	\
+	    case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break;	\
+	    case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break;	\
+	    case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break;	\
+	    default: ia64_setreg_unknown_kr(); break;		\
+	}							\
+})
+
+/*
+ * The following three macros can't be inline functions because we don't have struct
+ * task_struct at this point.
+ */
+
+/*
+ * Return TRUE if task T owns the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_is_local_fpu_owner(t)								\
+({												\
+	struct task_struct *__ia64_islfo_task = (t);						\
+	(__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id()				\
+	 && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER));	\
+})
+
+/*
+ * Mark task T as owning the fph partition of the CPU we're running on.
+ * Must be called from code that has preemption disabled.
+ */
+#define ia64_set_local_fpu_owner(t) do {						\
+	struct task_struct *__ia64_slfo_task = (t);					\
+	__ia64_slfo_task->thread.last_fph_cpu = smp_processor_id();			\
+	ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task);		\
+} while (0)
+
+/* Mark the fph partition of task T as being invalid on all CPUs.  */
+#define ia64_drop_fpu(t)	((t)->thread.last_fph_cpu = -1)
+
+extern void __ia64_init_fpu (void);
+extern void __ia64_save_fpu (struct ia64_fpreg *fph);
+extern void __ia64_load_fpu (struct ia64_fpreg *fph);
+extern void ia64_save_debug_regs (unsigned long *save_area);
+extern void ia64_load_debug_regs (unsigned long *save_area);
+
+#ifdef CONFIG_IA32_SUPPORT
+extern void ia32_save_state (struct task_struct *task);
+extern void ia32_load_state (struct task_struct *task);
+#endif
+
+#define ia64_fph_enable()	do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+#define ia64_fph_disable()	do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0)
+
+/* load fp 0.0 into fph */
+static inline void
+ia64_init_fpu (void) {
+	ia64_fph_enable();
+	__ia64_init_fpu();
+	ia64_fph_disable();
+}
+
+/* save f32-f127 at FPH */
+static inline void
+ia64_save_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_save_fpu(fph);
+	ia64_fph_disable();
+}
+
+/* load f32-f127 from FPH */
+static inline void
+ia64_load_fpu (struct ia64_fpreg *fph) {
+	ia64_fph_enable();
+	__ia64_load_fpu(fph);
+	ia64_fph_disable();
+}
+
+static inline __u64
+ia64_clear_ic (void)
+{
+	__u64 psr;
+	psr = ia64_getreg(_IA64_REG_PSR);
+	ia64_stop();
+	ia64_rsm(IA64_PSR_I | IA64_PSR_IC);
+	ia64_srlz_i();
+	return psr;
+}
+
+/*
+ * Restore the psr.
+ */
+static inline void
+ia64_set_psr (__u64 psr)
+{
+	ia64_stop();
+	ia64_setreg(_IA64_REG_PSR_L, psr);
+	ia64_srlz_d();
+}
+
+/*
+ * Insert a translation into an instruction and/or data translation
+ * register.
+ */
+static inline void
+ia64_itr (__u64 target_mask, __u64 tr_num,
+	  __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	if (target_mask & 0x1)
+		ia64_itri(tr_num, pte);
+	if (target_mask & 0x2)
+		ia64_itrd(tr_num, pte);
+}
+
+/*
+ * Insert a translation into the instruction and/or data translation
+ * cache.
+ */
+static inline void
+ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte,
+	  __u64 log_page_size)
+{
+	ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2));
+	ia64_setreg(_IA64_REG_CR_IFA, vmaddr);
+	ia64_stop();
+	/* as per EAS2.6, itc must be the last instruction in an instruction group */
+	if (target_mask & 0x1)
+		ia64_itci(pte);
+	if (target_mask & 0x2)
+		ia64_itcd(pte);
+}
+
+/*
+ * Purge a range of addresses from instruction and/or data translation
+ * register(s).
+ */
+static inline void
+ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size)
+{
+	if (target_mask & 0x1)
+		ia64_ptri(vmaddr, (log_size << 2));
+	if (target_mask & 0x2)
+		ia64_ptrd(vmaddr, (log_size << 2));
+}
+
+/* Set the interrupt vector address.  The address must be suitably aligned (32KB).  */
+static inline void
+ia64_set_iva (void *ivt_addr)
+{
+	ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr);
+	ia64_srlz_i();
+}
+
+/* Set the page table address and control bits.  */
+static inline void
+ia64_set_pta (__u64 pta)
+{
+	/* Note: srlz.i implies srlz.d */
+	ia64_setreg(_IA64_REG_CR_PTA, pta);
+	ia64_srlz_i();
+}
+
+static inline void
+ia64_eoi (void)
+{
+	ia64_setreg(_IA64_REG_CR_EOI, 0);
+	ia64_srlz_d();
+}
+
+#define cpu_relax()	ia64_hint(ia64_hint_pause)
+
+static inline void
+ia64_set_lrr0 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR0, val);
+	ia64_srlz_d();
+}
+
+static inline void
+ia64_set_lrr1 (unsigned long val)
+{
+	ia64_setreg(_IA64_REG_CR_LRR1, val);
+	ia64_srlz_d();
+}
+
+
+/*
+ * Given the address to which a spill occurred, return the unat bit
+ * number that corresponds to this address.
+ */
+static inline __u64
+ia64_unat_pos (void *spill_addr)
+{
+	return ((__u64) spill_addr >> 3) & 0x3f;
+}
+
+/*
+ * Set the NaT bit of an integer register which was spilled at address
+ * SPILL_ADDR.  UNAT is the mask to be updated.
+ */
+static inline void
+ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
+{
+	__u64 bit = ia64_unat_pos(spill_addr);
+	__u64 mask = 1UL << bit;
+
+	*unat = (*unat & ~mask) | (nat << bit);
+}
+
+/*
+ * Return saved PC of a blocked thread.
+ * Note that the only way T can block is through a call to schedule() -> switch_to().
+ */
+static inline unsigned long
+thread_saved_pc (struct task_struct *t)
+{
+	struct unw_frame_info info;
+	unsigned long ip;
+
+	unw_init_from_blocked_task(&info, t);
+	if (unw_unwind(&info) < 0)
+		return 0;
+	unw_get_ip(&info, &ip);
+	return ip;
+}
+
+/*
+ * Get the current instruction/program counter value.
+ */
+#define current_text_addr() \
+	({ void *_pc; _pc = (void *)ia64_getreg(_IA64_REG_IP); _pc; })
+
+static inline __u64
+ia64_get_ivr (void)
+{
+	__u64 r;
+	ia64_srlz_d();
+	r = ia64_getreg(_IA64_REG_CR_IVR);
+	ia64_srlz_d();
+	return r;
+}
+
+static inline void
+ia64_set_dbr (__u64 regnum, __u64 value)
+{
+	__ia64_set_dbr(regnum, value);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+}
+
+static inline __u64
+ia64_get_dbr (__u64 regnum)
+{
+	__u64 retval;
+
+	retval = __ia64_get_dbr(regnum);
+#ifdef CONFIG_ITANIUM
+	ia64_srlz_d();
+#endif
+	return retval;
+}
+
+static inline __u64
+ia64_rotr (__u64 w, __u64 n)
+{
+	return (w >> n) | (w << (64 - n));
+}
+
+#define ia64_rotl(w,n)	ia64_rotr((w), (64) - (n))
+
+/*
+ * Take a mapped kernel address and return the equivalent address
+ * in the region 7 identity mapped virtual area.
+ */
+static inline void *
+ia64_imva (void *addr)
+{
+	void *result;
+	result = (void *) ia64_tpa(addr);
+	return __va(result);
+}
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define PREFETCH_STRIDE			L1_CACHE_BYTES
+
+static inline void
+prefetch (const void *x)
+{
+	 ia64_lfetch(ia64_lfhint_none, x);
+}
+
+static inline void
+prefetchw (const void *x)
+{
+	ia64_lfetch_excl(ia64_lfhint_none, x);
+}
+
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+extern unsigned long boot_option_idle_override;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_IA64_PROCESSOR_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/system.h b/linux-2.6-xen-sparse/include/asm-ia64/system.h
new file mode 100644
index 0000000000..9c3da05b86
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/system.h
@@ -0,0 +1,295 @@
+#ifndef _ASM_IA64_SYSTEM_H
+#define _ASM_IA64_SYSTEM_H
+
+/*
+ * System defines. Note that this is included both from .c and .S
+ * files, so it does only defines, not any C code.  This is based
+ * on information published in the Processor Abstraction Layer
+ * and the System Abstraction Layer manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#include <linux/config.h>
+
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/percpu.h>
+
+#define GATE_ADDR		__IA64_UL_CONST(0xa000000000000000)
+/*
+ * 0xa000000000000000+2*PERCPU_PAGE_SIZE
+ * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page)
+ */
+#define KERNEL_START		 __IA64_UL_CONST(0xa000000100000000)
+#define PERCPU_ADDR		(-PERCPU_PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct pci_vector_struct {
+	__u16 segment;	/* PCI Segment number */
+	__u16 bus;	/* PCI Bus number */
+	__u32 pci_id;	/* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */
+	__u8 pin;	/* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */
+	__u32 irq;	/* IRQ assigned */
+};
+
+extern struct ia64_boot_param {
+	__u64 command_line;		/* physical address of command line arguments */
+	__u64 efi_systab;		/* physical address of EFI system table */
+	__u64 efi_memmap;		/* physical address of EFI memory map */
+	__u64 efi_memmap_size;		/* size of EFI memory map */
+	__u64 efi_memdesc_size;		/* size of an EFI memory map descriptor */
+	__u32 efi_memdesc_version;	/* memory descriptor version */
+	struct {
+		__u16 num_cols;	/* number of columns on console output device */
+		__u16 num_rows;	/* number of rows on console output device */
+		__u16 orig_x;	/* cursor's x position */
+		__u16 orig_y;	/* cursor's y position */
+	} console_info;
+	__u64 fpswa;		/* physical address of the fpswa interface */
+	__u64 initrd_start;
+	__u64 initrd_size;
+} *ia64_boot_param;
+
+/*
+ * Macros to force memory ordering.  In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ *   wmb():	Guarantees that all preceding stores to memory-
+ *		like regions are visible before any subsequent
+ *		stores and that all following stores will be
+ *		visible only after all previous stores.
+ *   rmb():	Like wmb(), but for reads.
+ *   mb():	wmb()/rmb() combo, i.e., all previous memory
+ *		accesses are visible before all subsequent
+ *		accesses and vice versa.  This is also known as
+ *		a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers.  For that, mf.a needs to
+ * be used.  However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+#define mb()	ia64_mf()
+#define rmb()	mb()
+#define wmb()	mb()
+#define read_barrier_depends()	do { } while(0)
+
+#ifdef CONFIG_SMP
+# define smp_mb()	mb()
+# define smp_rmb()	rmb()
+# define smp_wmb()	wmb()
+# define smp_read_barrier_depends()	read_barrier_depends()
+#else
+# define smp_mb()	barrier()
+# define smp_rmb()	barrier()
+# define smp_wmb()	barrier()
+# define smp_read_barrier_depends()	do { } while(0)
+#endif
+
+/*
+ * XXX check on these---I suspect what Linus really wants here is
+ * acquire vs release semantics but we can't discuss this stuff with
+ * Linus just yet.  Grrr...
+ */
+#define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
+#define set_wmb(var, value)	do { (var) = (value); mb(); } while (0)
+
+#define safe_halt()         ia64_pal_halt_light()    /* PAL_HALT_LIGHT */
+
+/*
+ * The group barrier in front of the rsm & ssm are necessary to ensure
+ * that none of the previous instructions in the same group are
+ * affected by the rsm/ssm.
+ */
+/* For spinlocks etc */
+
+/*
+ * - clearing psr.i is implicitly serialized (visible by next insn)
+ * - setting psr.i requires data serialization
+ * - we need a stop-bit before reading PSR because we sometimes
+ *   write a floating-point register right before reading the PSR
+ *   and that writes to PSR.mfl
+ */
+#define __local_irq_save(x)			\
+do {						\
+	ia64_stop();				\
+	(x) = ia64_get_psr_i();			\
+	ia64_stop();				\
+	ia64_rsm(IA64_PSR_I);			\
+} while (0)
+
+#define __local_irq_disable()			\
+do {						\
+	ia64_stop();				\
+	ia64_rsm(IA64_PSR_I);			\
+} while (0)
+
+#define __local_irq_restore(x)	ia64_intrin_local_irq_restore((x) & IA64_PSR_I)
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+  extern unsigned long last_cli_ip;
+
+# define __save_ip()		last_cli_ip = ia64_getreg(_IA64_REG_IP)
+
+# define local_irq_save(x)					\
+do {								\
+	unsigned long psr;					\
+								\
+	__local_irq_save(psr);					\
+	if (psr & IA64_PSR_I)					\
+		__save_ip();					\
+	(x) = psr;						\
+} while (0)
+
+# define local_irq_disable()	do { unsigned long x; local_irq_save(x); } while (0)
+
+# define local_irq_restore(x)					\
+do {								\
+	unsigned long old_psr, psr = (x);			\
+								\
+	local_save_flags(old_psr);				\
+	__local_irq_restore(psr);				\
+	if ((old_psr & IA64_PSR_I) && !(psr & IA64_PSR_I))	\
+		__save_ip();					\
+} while (0)
+
+#else /* !CONFIG_IA64_DEBUG_IRQ */
+# define local_irq_save(x)	__local_irq_save(x)
+# define local_irq_disable()	__local_irq_disable()
+# define local_irq_restore(x)	__local_irq_restore(x)
+#endif /* !CONFIG_IA64_DEBUG_IRQ */
+
+#define local_irq_enable()	({ ia64_stop(); ia64_ssm(IA64_PSR_I); ia64_srlz_d(); })
+#define local_save_flags(flags)	({ ia64_stop(); (flags) = ia64_get_psr_i(); })
+
+#define irqs_disabled()				\
+({						\
+	unsigned long __ia64_id_flags;		\
+	local_save_flags(__ia64_id_flags);	\
+	(__ia64_id_flags & IA64_PSR_I) == 0;	\
+})
+
+#ifdef __KERNEL__
+
+#define prepare_to_switch()    do { } while(0)
+
+#ifdef CONFIG_IA32_SUPPORT
+# define IS_IA32_PROCESS(regs)	(ia64_psr(regs)->is != 0)
+#else
+# define IS_IA32_PROCESS(regs)		0
+struct task_struct;
+static inline void ia32_save_state(struct task_struct *t __attribute__((unused))){}
+static inline void ia32_load_state(struct task_struct *t __attribute__((unused))){}
+#endif
+
+/*
+ * Context switch from one thread to another.  If the two threads have
+ * different address spaces, schedule() has already taken care of
+ * switching to the new address space by calling switch_mm().
+ *
+ * Disabling access to the fph partition and the debug-register
+ * context switch MUST be done before calling ia64_switch_to() since a
+ * newly created thread returns directly to
+ * ia64_ret_from_syscall_clear_r8.
+ */
+extern struct task_struct *ia64_switch_to (void *next_task);
+
+struct task_struct;
+
+extern void ia64_save_extra (struct task_struct *task);
+extern void ia64_load_extra (struct task_struct *task);
+
+#ifdef CONFIG_PERFMON
+  DECLARE_PER_CPU(unsigned long, pfm_syst_info);
+# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
+#else
+# define PERFMON_IS_SYSWIDE() (0)
+#endif
+
+#define IA64_HAS_EXTRA_STATE(t)							\
+	((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)	\
+	 || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE())
+
+#define __switch_to(prev,next,last) do {							 \
+	if (IA64_HAS_EXTRA_STATE(prev))								 \
+		ia64_save_extra(prev);								 \
+	if (IA64_HAS_EXTRA_STATE(next))								 \
+		ia64_load_extra(next);								 \
+	ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);			 \
+	(last) = ia64_switch_to((next));							 \
+} while (0)
+
+#ifdef CONFIG_SMP
+/*
+ * In the SMP case, we save the fph state when context-switching away from a thread that
+ * modified fph.  This way, when the thread gets scheduled on another CPU, the CPU can
+ * pick up the state from task->thread.fph, avoiding the complication of having to fetch
+ * the latest fph state from another CPU.  In other words: eager save, lazy restore.
+ */
+# define switch_to(prev,next,last) do {						\
+	if (ia64_psr(ia64_task_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) {				\
+		ia64_psr(ia64_task_regs(prev))->mfh = 0;			\
+		(prev)->thread.flags |= IA64_THREAD_FPH_VALID;			\
+		__ia64_save_fpu((prev)->thread.fph);				\
+	}									\
+	__switch_to(prev, next, last);						\
+} while (0)
+#else
+# define switch_to(prev,next,last)	__switch_to(prev, next, last)
+#endif
+
+/*
+ * On IA-64, we don't want to hold the runqueue's lock during the low-level context-switch,
+ * because that could cause a deadlock.  Here is an example by Erich Focht:
+ *
+ * Example:
+ * CPU#0:
+ * schedule()
+ *    -> spin_lock_irq(&rq->lock)
+ *    -> context_switch()
+ *       -> wrap_mmu_context()
+ *          -> read_lock(&tasklist_lock)
+ *
+ * CPU#1:
+ * sys_wait4() or release_task() or forget_original_parent()
+ *    -> write_lock(&tasklist_lock)
+ *    -> do_notify_parent()
+ *       -> wake_up_parent()
+ *          -> try_to_wake_up()
+ *             -> spin_lock_irq(&parent_rq->lock)
+ *
+ * If the parent's rq happens to be on CPU#0, we'll wait for the rq->lock
+ * of that CPU which will not be released, because there we wait for the
+ * tasklist_lock to become available.
+ */
+#define prepare_arch_switch(rq, next)		\
+do {						\
+	spin_lock(&(next)->switch_lock);	\
+	spin_unlock(&(rq)->lock);		\
+} while (0)
+#define finish_arch_switch(rq, prev)	spin_unlock_irq(&(prev)->switch_lock)
+#define task_running(rq, p) 		((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
+
+#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
+
+void cpu_idle_wait(void);
+
+#define arch_align_stack(x) (x)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IA64_SYSTEM_H */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
new file mode 100644
index 0000000000..d4a44490e0
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
@@ -0,0 +1,274 @@
+#ifndef _ASM_IA64_XEN_PRIVOP_H
+#define _ASM_IA64_XEN_PRIVOP_H
+
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co
+ *	Dan Magenheimer <dan.magenheimer@hp.com>
+ *
+ * Paravirtualizations of privileged operations for Xen/ia64
+ *
+ */
+
+
+#include <asm/xen/asm-xsi-offsets.h>
+
+#define IA64_PARAVIRTUALIZED
+
+#ifdef __ASSEMBLY__
+#define	XEN_HYPER_RFI			break 0x1
+#define	XEN_HYPER_RSM_PSR_DT		break 0x2
+#define	XEN_HYPER_SSM_PSR_DT		break 0x3
+#define	XEN_HYPER_COVER			break 0x4
+#define	XEN_HYPER_ITC_D			break 0x5
+#define	XEN_HYPER_ITC_I			break 0x6
+#define	XEN_HYPER_SSM_I			break 0x7
+#define	XEN_HYPER_GET_IVR		break 0x8
+#define	XEN_HYPER_GET_TPR		break 0x9
+#define	XEN_HYPER_SET_TPR		break 0xa
+#define	XEN_HYPER_EOI			break 0xb
+#define	XEN_HYPER_SET_ITM		break 0xc
+#define	XEN_HYPER_THASH			break 0xd
+#define	XEN_HYPER_PTC_GA		break 0xe
+#define	XEN_HYPER_ITR_D			break 0xf
+#define	XEN_HYPER_GET_RR		break 0x10
+#define	XEN_HYPER_SET_RR		break 0x11
+#endif
+
+#ifndef __ASSEMBLY__
+extern int running_on_xen;
+
+#define	XEN_HYPER_SSM_I			asm("break 0x7");
+#define	XEN_HYPER_GET_IVR		asm("break 0x8");
+
+/************************************************/
+/* Instructions paravirtualized for correctness */
+/************************************************/
+
+/* "fc" and "thash" are privilege-sensitive instructions, meaning they
+ *  may have different semantics depending on whether they are executed
+ *  at PL0 vs PL!=0.  When paravirtualized, these instructions mustn't
+ *  be allowed to execute directly, lest incorrect semantics result. */
+extern unsigned long xen_fc(unsigned long addr);
+#define ia64_fc(addr)			xen_fc((unsigned long)(addr))
+extern unsigned long xen_thash(unsigned long addr);
+#define ia64_thash(addr)		xen_thash((unsigned long)(addr))
+/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag"
+ * is not currently used (though it may be in a long-format VHPT system!)
+ * and the semantics of cover only change if psr.ic is off which is very
+ * rare (and currently non-existent outside of assembly code */
+
+/* There are also privilege-sensitive registers.  These registers are
+ * readable at any privilege level but only writable at PL0. */
+extern unsigned long xen_get_cpuid(int index);
+#define	ia64_get_cpuid(i)		xen_get_cpuid(i)
+extern unsigned long xen_get_pmd(int index);
+#define	ia64_get_pmd(i)			xen_get_pmd(i)
+extern unsigned long xen_get_eflag(void);	/* see xen_ia64_getreg */
+extern void xen_set_eflag(unsigned long);	/* see xen_ia64_setreg */
+
+/************************************************/
+/* Instructions paravirtualized for performance */
+/************************************************/
+
+/* Xen uses memory-mapped virtual privileged registers for access to many
+ * performance-sensitive privileged registers.  Some, like the processor
+ * status register (psr), are broken up into multiple memory locations.
+ * Others, like "pend", are abstractions based on privileged registers.
+ * "Pend" is guaranteed to be set if reading cr.ivr would return a
+ * (non-spurious) interrupt. */
+#define xen_get_virtual_psr_i()		(*(int *)(XSI_PSR_I))
+#define xen_set_virtual_psr_i(_val)	({ *(int *)(XSI_PSR_I) = _val ? 1:0; })
+#define xen_set_virtual_psr_ic(_val)	({ *(int *)(XSI_PSR_IC) = _val ? 1:0; })
+#define xen_get_virtual_pend()		(*(int *)(XSI_PEND))
+
+/* Hyperprivops are "break" instructions with a well-defined API.
+ * In particular, the virtual psr.ic bit must be off; in this way
+ * it is guaranteed to never conflict with a linux break instruction.
+ * Normally, this is done in a xen stub but this one is frequent enough
+ * that we inline it */
+#define xen_hyper_ssm_i()						\
+({									\
+	xen_set_virtual_psr_i(0);					\
+	xen_set_virtual_psr_ic(0);					\
+	XEN_HYPER_SSM_I;						\
+})
+
+/* kernel register paravirtualization may soon go away */
+#define xen_get_kr(regnum) (((unsigned long *)(XSI_KR0))[regnum])
+#define xen_set_kr(regnum,val) ((((unsigned long *)(XSI_KR0))[regnum]) = val)
+
+/* turning off interrupts can be paravirtualized simply by writing
+ * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */
+#define xen_rsm_i()	xen_set_virtual_psr_i(0)
+
+/* turning on interrupts is a bit more complicated.. write to the
+ * memory-mapped virtual psr.i bit first (to avoid race condition),
+ * then if any interrupts were pending, we have to execute a hyperprivop
+ * to ensure the pending interrupt gets delivered; else we're done! */
+#define xen_ssm_i()							\
+({									\
+	int old = xen_get_virtual_psr_i();				\
+	xen_set_virtual_psr_i(1);					\
+	if (!old && xen_get_virtual_pend()) xen_hyper_ssm_i();		\
+})
+
+#define xen_ia64_intrin_local_irq_restore(x)				\
+{									\
+     if (running_on_xen) {						\
+	if ((x) & IA64_PSR_I) { xen_ssm_i(); }				\
+	else { xen_rsm_i(); }						\
+    }									\
+    else __ia64_intrin_local_irq_restore((x));				\
+}
+
+#define	xen_get_psr_i()							\
+(									\
+	(running_on_xen) ?						\
+		(xen_get_virtual_psr_i() ? IA64_PSR_I : 0)		\
+		: __ia64_get_psr_i()					\
+)
+
+#define xen_ia64_ssm(mask)						\
+{									\
+	if ((mask)==IA64_PSR_I) {					\
+		if (running_on_xen) { xen_ssm_i(); }			\
+		else { __ia64_ssm(mask); }				\
+	}								\
+	else { __ia64_ssm(mask); }					\
+}
+
+#define xen_ia64_rsm(mask)						\
+{									\
+	if ((mask)==IA64_PSR_I) {					\
+		if (running_on_xen) { xen_rsm_i(); }			\
+		else { __ia64_rsm(mask); }				\
+	}								\
+	else { __ia64_rsm(mask); }					\
+}
+
+
+/* Although all privileged operations can be left to trap and will
+ * be properly handled by Xen, some are frequent enough that we use
+ * hyperprivops for performance. */
+
+extern unsigned long xen_get_ivr(void);
+extern unsigned long xen_get_tpr(void);
+extern void xen_set_itm(unsigned long);
+extern void xen_set_tpr(unsigned long);
+extern void xen_eoi(void);
+extern void xen_set_rr(unsigned long index, unsigned long val);
+extern unsigned long xen_get_rr(unsigned long index);
+
+/* Note: It may look wrong to test for running_on_xen in each case.
+ * However regnum is always a constant so, as written, the compiler
+ * eliminates the switch statement, whereas running_on_xen must be
+ * tested dynamically. */
+#define xen_ia64_getreg(regnum)						\
+({									\
+	__u64 ia64_intri_res;						\
+									\
+	switch(regnum) {						\
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:			\
+		ia64_intri_res = (running_on_xen) ?			\
+			xen_get_kr((regnum-_IA64_REG_AR_KR0)) :		\
+			__ia64_getreg(regnum);				\
+		break;							\
+	case _IA64_REG_CR_IVR:						\
+		ia64_intri_res = (running_on_xen) ?			\
+			xen_get_ivr() :					\
+			__ia64_getreg(regnum);				\
+		break;							\
+	case _IA64_REG_CR_TPR:						\
+		ia64_intri_res = (running_on_xen) ?			\
+			xen_get_tpr() :					\
+			__ia64_getreg(regnum);				\
+		break;							\
+	case _IA64_REG_AR_EFLAG:					\
+		ia64_intri_res = (running_on_xen) ?			\
+			xen_get_eflag() :				\
+			__ia64_getreg(regnum);				\
+		break;							\
+	default:							\
+		ia64_intri_res = __ia64_getreg(regnum);			\
+		break;							\
+	}								\
+	ia64_intri_res;							\
+})
+
+#define xen_ia64_setreg(regnum,val)					\
+({									\
+	switch(regnum) {						\
+	case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:			\
+		(running_on_xen) ?					\
+			xen_set_kr((regnum-_IA64_REG_AR_KR0), val) :	\
+			__ia64_setreg(regnum,val);			\
+		break;							\
+	case _IA64_REG_CR_ITM:						\
+		(running_on_xen) ?					\
+			xen_set_itm(val) :				\
+			__ia64_setreg(regnum,val);			\
+		break;							\
+	case _IA64_REG_CR_TPR:						\
+		(running_on_xen) ?					\
+			xen_set_tpr(val) :				\
+			__ia64_setreg(regnum,val);			\
+		break;							\
+	case _IA64_REG_CR_EOI:						\
+		(running_on_xen) ?					\
+			xen_eoi() :					\
+			__ia64_setreg(regnum,val);			\
+		break;							\
+	case _IA64_REG_AR_EFLAG:					\
+		(running_on_xen) ?					\
+			xen_set_eflag(val) :				\
+			__ia64_setreg(regnum,val);			\
+		break;							\
+	default:							\
+		__ia64_setreg(regnum,val);				\
+		break;							\
+	}								\
+})
+
+#define ia64_ssm			xen_ia64_ssm
+#define ia64_rsm			xen_ia64_rsm
+#define ia64_intrin_local_irq_restore	xen_ia64_intrin_local_irq_restore
+#define	ia64_ptcga			xen_ptcga
+#define	ia64_set_rr(index,val)		xen_set_rr(index,val)
+#define	ia64_get_rr(index)		xen_get_rr(index)
+#define ia64_getreg			xen_ia64_getreg
+#define ia64_setreg			xen_ia64_setreg
+#define	ia64_get_psr_i			xen_get_psr_i
+
+/* the remainder of these are not performance-sensitive so its
+ * OK to not paravirtualize and just take a privop trap and emulate */
+#define ia64_hint			__ia64_hint
+#define ia64_set_pmd			__ia64_set_pmd
+#define ia64_itci			__ia64_itci
+#define ia64_itcd			__ia64_itcd
+#define ia64_itri			__ia64_itri
+#define ia64_itrd			__ia64_itrd
+#define ia64_tpa			__ia64_tpa
+#define ia64_set_ibr			__ia64_set_ibr
+#define ia64_set_pkr			__ia64_set_pkr
+#define ia64_set_pmc			__ia64_set_pmc
+#define ia64_get_ibr			__ia64_get_ibr
+#define ia64_get_pkr			__ia64_get_pkr
+#define ia64_get_pmc			__ia64_get_pmc
+#define ia64_ptce			__ia64_ptce
+#define ia64_ptcl			__ia64_ptcl
+#define ia64_ptri			__ia64_ptri
+#define ia64_ptrd			__ia64_ptrd
+
+#endif /* !__ASSEMBLY__ */
+
+/* these routines utilize privilege-sensitive or performance-sensitive
+ * privileged instructions so the code must be replaced with
+ * paravirtualized versions */
+#define ia64_pal_halt_light		xen_pal_halt_light
+#define	ia64_leave_kernel		xen_leave_kernel
+#define	ia64_leave_syscall		xen_leave_syscall
+#define	ia64_trace_syscall		xen_trace_syscall
+#define	ia64_switch_to			xen_switch_to
+#define	ia64_pal_call_static		xen_pal_call_static
+
+#endif /* _ASM_IA64_XEN_PRIVOP_H */
diff --git a/tools/examples/xen-backend.agent b/tools/examples/xen-backend.agent
index 6aefea1399..91a5d0eb32 100755
--- a/tools/examples/xen-backend.agent
+++ b/tools/examples/xen-backend.agent
@@ -1,7 +1,3 @@
-
-copyrev: 0000000000000000000000000000000000000000
-copy: tools/examples/backend.hotplug
-
 #! /bin/sh
 
 #ACTION=add
author	djm@kirby.fc.hp.com <djm@kirby.fc.hp.com>	2005-09-21 09:06:30 -0600
committer	djm@kirby.fc.hp.com <djm@kirby.fc.hp.com>	2005-09-21 09:06:30 -0600
commit	fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5 (patch)
tree	f27e2f3e1d2dcb8bdd9edfc533c42647d10361f7
parent	3c0d086543c9433d8f2fc33f1afd001fa31d3878 (diff)
download	xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.tar.gz xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.tar.bz2 xen-fa4a47057d8cc325a0328ba6f7e99eaaa525e9c5.zip