43 files changed, 0 insertions, 28857 deletions
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig
deleted file mode 100644
index 4991dd4a2b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig
+++ /dev/null
@@ -1,587 +0,0 @@
-#
-# For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
-#
-
-mainmenu "IA-64 Linux Kernel Configuration"
-
-source "init/Kconfig"
-
-menu "Processor type and features"
-
-config IA64
-	bool
-	default y
-	help
-	  The Itanium Processor Family is Intel's 64-bit successor to
-	  the 32-bit X86 line.  The IA-64 Linux project has a home
-	  page at <http://www.linuxia64.org/> and a mailing list at
-	  <linux-ia64@vger.kernel.org>.
-
-config 64BIT
-	bool
-	default y
-
-config MMU
-	bool
-	default y
-
-config SWIOTLB
-       bool
-       default y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default y
-
-config GENERIC_FIND_NEXT_BIT
-	bool
-	default y
-
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
-config TIME_INTERPOLATION
-	bool
-	default y
-
-config DMI
-	bool
-	default y
-
-config EFI
-	bool
-	default y
-
-config GENERIC_IOMAP
-	bool
-	default y
-
-config XEN
-	bool "Xen hypervisor support"
-	default y
-	help
-	  Enable Xen hypervisor support.  Resulting kernel runs
-	  both as a guest OS on Xen and natively on hardware.
-
-config XEN_IA64_VDSO_PARAVIRT
-	bool
-	depends on XEN && !ITANIUM
-	default y
-	help
-	  vDSO paravirtualization
-
-config XEN_IA64_EXPOSE_P2M
-	bool "Xen/IA64 exposure p2m table"
-	depends on XEN
-	default y
-	help
-	  expose p2m from xen
-
-config XEN_IA64_EXPOSE_P2M_USE_DTR
-	bool "Xen/IA64 map p2m table with dtr"
-	depends on XEN_IA64_EXPOSE_P2M
-	default y
-	help
-	  use dtr to map the exposed p2m table
-
-config SCHED_NO_NO_OMIT_FRAME_POINTER
-	bool
-	default y
-
-config IA64_UNCACHED_ALLOCATOR
-	bool
-	select GENERIC_ALLOCATOR
-
-config DMA_IS_DMA32
-	bool
-	default y
-
-config DMA_IS_NORMAL
-	bool
-	depends on IA64_SGI_SN2
-	default y
-
-config AUDIT_ARCH
-	bool
-	default y
-
-choice
-	prompt "System type"
-	default IA64_GENERIC
-
-config IA64_GENERIC
-	bool "generic"
-	select ACPI
-	select PCI
-	select NUMA
-	select ACPI_NUMA
-	help
-	  This selects the system type of your hardware.  A "generic" kernel
-	  will run on any supported IA-64 system.  However, if you configure
-	  a kernel for your specific system, it will be faster and smaller.
-
-	  generic		For any supported IA-64 system
-	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
-	  HP-zx1/sx1000		For HP systems
-	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
-	  SGI-SN2		For SGI Altix systems
-	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
-
-	  If you don't know what to do, choose "generic".
-
-config IA64_DIG
-	bool "DIG-compliant"
-
-config IA64_HP_ZX1
-	bool "HP-zx1/sx1000"
-	help
-	  Build a kernel that runs on HP zx1 and sx1000 systems.  This adds
-	  support for the HP I/O MMU.
-
-config IA64_HP_ZX1_SWIOTLB
-	bool "HP-zx1/sx1000 with software I/O TLB"
-	help
-	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
-	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
-	  from support for the HP I/O MMU, this includes support for the software
-	  I/O TLB, which allows supporting the broken devices at the expense of
-	  wasting some kernel memory (about 2MB by default).
-
-config IA64_SGI_SN2
-	bool "SGI-SN2"
-	help
-	  Selecting this option will optimize the kernel for use on sn2 based
-	  systems, but the resulting kernel binary will not run on other
-	  types of ia64 systems.  If you have an SGI Altix system, it's safe
-	  to select this option.  If in doubt, select ia64 generic support
-	  instead.
-
-config IA64_HP_SIM
-	bool "Ski-simulator"
-
-config IA64_XEN
-	bool "Xen guest"
-	depends on XEN
-
-endchoice
-
-choice
-	prompt "Processor type"
-	default ITANIUM
-
-config ITANIUM
-	bool "Itanium"
-	help
-	  Select your IA-64 processor type.  The default is Itanium.
-	  This choice is safe for all IA-64 systems, but may not perform
-	  optimally on systems with, say, Itanium 2 or newer processors.
-
-config MCKINLEY
-	bool "Itanium 2"
-	help
-	  Select this to configure for an Itanium 2 (McKinley) processor.
-
-endchoice
-
-choice
-	prompt "Kernel page size"
-	default IA64_PAGE_SIZE_16KB
-
-config IA64_PAGE_SIZE_4KB
-	bool "4KB"
-	help
-	  This lets you select the page size of the kernel.  For best IA-64
-	  performance, a page size of 8KB or 16KB is recommended.  For best
-	  IA-32 compatibility, a page size of 4KB should be selected (the vast
-	  majority of IA-32 binaries work perfectly fine with a larger page
-	  size).  For Itanium 2 or newer systems, a page size of 64KB can also
-	  be selected.
-
-	  4KB                For best IA-32 compatibility
-	  8KB                For best IA-64 performance
-	  16KB               For best IA-64 performance
-	  64KB               Requires Itanium 2 or newer processor.
-
-	  If you don't know what to do, choose 16KB.
-
-config IA64_PAGE_SIZE_8KB
-	bool "8KB"
-
-config IA64_PAGE_SIZE_16KB
-	bool "16KB"
-
-config IA64_PAGE_SIZE_64KB
-	depends on !ITANIUM
-	bool "64KB"
-
-endchoice
-
-choice
-	prompt "Page Table Levels"
-	default PGTABLE_3
-
-config PGTABLE_3
-	bool "3 Levels"
-
-config PGTABLE_4
-	depends on !IA64_PAGE_SIZE_64KB
-	bool "4 Levels"
-
-endchoice
-
-source kernel/Kconfig.hz
-
-config IA64_BRL_EMU
-	bool
-	depends on ITANIUM
-	default y
-
-# align cache-sensitive data to 128 bytes
-config IA64_L1_CACHE_SHIFT
-	int
-	default "7" if MCKINLEY
-	default "6" if ITANIUM
-
-config IA64_CYCLONE
-	bool "Cyclone (EXA) Time Source support"
-	help
-	  Say Y here to enable support for IBM EXA Cyclone time source.
-	  If you're unsure, answer N.
-
-config IOSAPIC
-	bool
-	depends on !IA64_HP_SIM
-	default y
-
-config IA64_SGI_SN_XP
-	tristate "Support communication between SGI SSIs"
-	depends on IA64_GENERIC || IA64_SGI_SN2
-	select IA64_UNCACHED_ALLOCATOR
-	help
-	  An SGI machine can be divided into multiple Single System
-	  Images which act independently of each other and have
-	  hardware based memory protection from the others.  Enabling
-	  this feature will allow for direct communication between SSIs
-	  based on a network adapter and DMA messaging.
-
-config FORCE_MAX_ZONEORDER
-	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
-	range 11 17  if !HUGETLB_PAGE
-	default "17" if HUGETLB_PAGE
-	default "11"
-
-config SMP
-	bool "Symmetric multi-processing support"
-	help
-	  This enables support for systems with more than one CPU. If you have
-	  a system with only one CPU, say N.  If you have a system with more
-	  than one CPU, say Y.
-
-	  If you say N here, the kernel will run on single and multiprocessor
-	  systems, but will use only one CPU of a multiprocessor system.  If
-	  you say Y here, the kernel will run on many, but not all,
-	  single processor systems.  On a single processor system, the kernel
-	  will run faster if you say N here.
-
-	  See also the <file:Documentation/smp.txt> and the SMP-HOWTO
-	  available at <http://www.tldp.org/docs.html#howto>.
-
-	  If you don't know what to do here, say N.
-
-config NR_CPUS
-	int "Maximum number of CPUs (2-1024)"
-	range 2 1024
-	depends on SMP
-	default "1024"
-	help
-	  You should set this to the number of CPUs in your system, but
-	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
-	  only use 2 CPUs on a >2 CPU system.  Setting this to a value larger
-	  than 64 will cause the use of a CPU mask array, causing a small
-	  performance hit.
-
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
-	depends on SMP && EXPERIMENTAL
-	select HOTPLUG
-	default n
-	---help---
-	  Say Y here to experiment with turning CPUs off and on.  CPUs
-	  can be controlled through /sys/devices/system/cpu/cpu#.
-	  Say N if you want to disable CPU hotplug.
-
-config ARCH_ENABLE_MEMORY_HOTPLUG
-	def_bool y
-
-config SCHED_SMT
-	bool "SMT scheduler support"
-	depends on SMP
-	help
-	  Improves the CPU scheduler's decision making when dealing with
-	  Intel IA64 chips with MultiThreading at a cost of slightly increased
-	  overhead in some places. If unsure say N here.
-
-config PERMIT_BSP_REMOVE
-	bool "Support removal of Bootstrap Processor"
-	depends on HOTPLUG_CPU
-	default n
-	---help---
-	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
-	support. 
-
-config FORCE_CPEI_RETARGET
-	bool "Force assumption that CPEI can be re-targetted"
-	depends on PERMIT_BSP_REMOVE
-	default n
-	---help---
-	Say Y if you need to force the assumption that CPEI can be re-targetted to
-	any cpu in the system. This hint is available via ACPI 3.0 specifications.
-	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
-	This option it useful to enable this feature on older BIOS's as well.
-	You can also enable this by using boot command line option force_cpei=1.
-
-config PREEMPT
-	bool "Preemptible Kernel"
-        help
-          This option reduces the latency of the kernel when reacting to
-          real-time or interactive events by allowing a low priority process to
-          be preempted even if it is in kernel mode executing a system call.
-          This allows applications to run more reliably even when the system is
-          under load.
-
-          Say Y here if you are building a kernel for a desktop, embedded
-          or real-time system.  Say N if you are unsure.
-
-source "mm/Kconfig"
-
-config ARCH_SELECT_MEMORY_MODEL
-	def_bool y
-
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool y
-	help
-	  Say Y to support efficient handling of discontiguous physical memory,
-	  for architectures which are either NUMA (Non-Uniform Memory Access)
-	  or have huge holes in the physical address space for other reasons.
- 	  See <file:Documentation/vm/numa> for more.
-
-config ARCH_FLATMEM_ENABLE
-	def_bool y
-
-config ARCH_SPARSEMEM_ENABLE
-	def_bool y
-	depends on ARCH_DISCONTIGMEM_ENABLE
-
-config ARCH_DISCONTIGMEM_DEFAULT
-	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
-	depends on ARCH_DISCONTIGMEM_ENABLE
-
-config NUMA
-	bool "NUMA support"
-	depends on !IA64_HP_SIM && !FLATMEM
-	default y if IA64_SGI_SN2
-	help
-	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
-	  Access).  This option is for configuring high-end multiprocessor
-	  server systems.  If in doubt, say N.
-
-config NODES_SHIFT
-	int "Max num nodes shift(3-10)"
-	range 3 10
-	default "10"
-	depends on NEED_MULTIPLE_NODES
-	help
-	  This option specifies the maximum number of nodes in your SSI system.
-	  MAX_NUMNODES will be 2^(This value).
-	  If in doubt, use the default.
-
-# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
-# VIRTUAL_MEM_MAP has been retained for historical reasons.
-config VIRTUAL_MEM_MAP
-	bool "Virtual mem map"
-	depends on !SPARSEMEM
-	default y if !IA64_HP_SIM
-	help
-	  Say Y to compile the kernel with support for a virtual mem map.
-	  This code also only takes effect if a memory hole of greater than
-	  1 Gb is found during boot.  You must turn this option on if you
-	  require the DISCONTIGMEM option for your machine. If you are
-	  unsure, say Y.
-
-config HOLES_IN_ZONE
-	bool
-	default y if VIRTUAL_MEM_MAP
-
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	def_bool y
-	depends on NEED_MULTIPLE_NODES
-
-config HAVE_ARCH_NODEDATA_EXTENSION
-	def_bool y
-	depends on NUMA
-
-config IA32_SUPPORT
-	bool "Support for Linux/x86 binaries"
-	help
-	  IA-64 processors can execute IA-32 (X86) instructions.  By
-	  saying Y here, the kernel will include IA-32 system call
-	  emulation support which makes it possible to transparently
-	  run IA-32 Linux binaries on an IA-64 Linux system.
-	  If in doubt, say Y.
-
-config COMPAT
-	bool
-	depends on IA32_SUPPORT
-	default y
-
-config IA64_MCA_RECOVERY
-	tristate "MCA recovery from errors other than TLB."
-
-config PERFMON
-	bool "Performance monitor support"
-	help
-	  Selects whether support for the IA-64 performance monitor hardware
-	  is included in the kernel.  This makes some kernel data-structures a
-	  little bigger and slows down execution a bit, but it is generally
-	  a good idea to turn this on.  If you're unsure, say Y.
-
-config IA64_PALINFO
-	tristate "/proc/pal support"
-	help
-	  If you say Y here, you are able to get PAL (Processor Abstraction
-	  Layer) information in /proc/pal.  This contains useful information
-	  about the processors in your systems, such as cache and TLB sizes
-	  and the PAL firmware version in use.
-
-	  To use this option, you have to ensure that the "/proc file system
-	  support" (CONFIG_PROC_FS) is enabled, too.
-
-config SGI_SN
-	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
-
-source "drivers/sn/Kconfig"
-
-source "drivers/firmware/Kconfig"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-menu "Power management and ACPI"
-
-source "kernel/power/Kconfig"
-
-source "drivers/acpi/Kconfig"
-
-if PM
-
-source "arch/ia64/kernel/cpufreq/Kconfig"
-
-endif
-
-endmenu
-
-if !IA64_HP_SIM
-
-menu "Bus options (PCI, PCMCIA)"
-
-config PCI
-	bool "PCI support"
-	help
-	  Real IA-64 machines all have PCI/PCI-X/PCI Express busses.  Say Y
-	  here unless you are using a simulator without PCI support.
-
-config PCI_DOMAINS
-	bool
-	default PCI
-
-config XEN_PCIDEV_FRONTEND
-	bool "Xen PCI Frontend"
-	depends on PCI && XEN
-	default y
-	help
-	  The PCI device frontend driver allows the kernel to import arbitrary
-	  PCI devices from a PCI backend to support PCI driver domains.
-
-config XEN_PCIDEV_FE_DEBUG
-	bool "Xen PCI Frontend Debugging"
-	depends on XEN_PCIDEV_FRONTEND
-	default n
-	help
-	  Enables some debug statements within the PCI Frontend.
-
-source "drivers/pci/pcie/Kconfig"
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-endmenu
-
-endif
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "lib/Kconfig"
-
-#
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_HARDIRQS
-	bool
-	default y
-
-config GENERIC_IRQ_PROBE
-	bool
-	default y
-
-config GENERIC_PENDING_IRQ
-	bool
-	depends on GENERIC_HARDIRQS && SMP
-	default y
-
-config IRQ_PER_CPU
-	bool
-	default y
-
-source "arch/ia64/hp/sim/Kconfig"
-
-menu "Instrumentation Support"
-        depends on EXPERIMENTAL
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
-
-source "arch/ia64/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-#
-# override default values of drivers/xen/Kconfig
-#
-if XEN
-config XEN_SMPBOOT
-	default n
-endif
-
-source "drivers/xen/Kconfig"
diff --git a/linux-2.6-xen-sparse/arch/ia64/Makefile b/linux-2.6-xen-sparse/arch/ia64/Makefile
deleted file mode 100644
index 9c7c05626b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/Makefile
+++ /dev/null
@@ -1,106 +0,0 @@
-#
-# ia64/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies.
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
-#
-
-NM := $(CROSS_COMPILE)nm -B
-READELF := $(CROSS_COMPILE)readelf
-
-export AWK
-
-CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
-
-OBJCOPYFLAGS	:= --strip-all
-LDFLAGS_vmlinux	:= -static
-LDFLAGS_MODULE	+= -T $(srctree)/arch/ia64/module.lds
-AFLAGS_KERNEL	:= -mconstant-gp
-EXTRA		:=
-
-cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
-		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
-CFLAGS_KERNEL	:= -mconstant-gp
-
-GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
-CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
-
-ifeq ($(GAS_STATUS),buggy)
-$(error Sorry, you need a newer version of the assember, one that is built from	\
-	a source-tree that post-dates 18-Dec-2002.  You can find a pre-compiled	\
-	static binary of such an assembler at:					\
-										\
-		ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
-endif
-
-ifeq ($(call cc-version),0304)
-	cflags-$(CONFIG_ITANIUM)	+= -mtune=merced
-	cflags-$(CONFIG_MCKINLEY)	+= -mtune=mckinley
-endif
-
-CFLAGS += $(cflags-y)
-
-cppflags-$(CONFIG_XEN) += \
-	-D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
-
-CPPFLAGS += $(cppflags-y)
-
-head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
-
-libs-y				+= arch/ia64/lib/
-core-y				+= arch/ia64/kernel/ arch/ia64/mm/
-core-$(CONFIG_IA32_SUPPORT)	+= arch/ia64/ia32/
-core-$(CONFIG_IA64_DIG) 	+= arch/ia64/dig/
-core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
-core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
-core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
-core-$(CONFIG_IA64_XEN)		+= arch/ia64/dig/
-core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
-core-$(CONFIG_XEN)		+= arch/ia64/xen/
-
-drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
-drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
-drivers-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/
-drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
-drivers-$(CONFIG_IA64_GENERIC)	+= arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
-drivers-$(CONFIG_OPROFILE)	+= arch/ia64/oprofile/
-
-boot := arch/ia64/hp/sim/boot
-
-PHONY += boot compressed check
-
-all: compressed unwcheck
-
-compressed: vmlinux.gz
-
-vmlinuz: vmlinux.gz
-
-vmlinux.gz: vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $@
-
-unwcheck: vmlinux
-	-$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
-
-archclean:
-	$(Q)$(MAKE) $(clean)=$(boot)
-
-CLEAN_FILES += vmlinux.gz bootloader
-
-boot:	lib/lib.a vmlinux
-	$(Q)$(MAKE) $(build)=$(boot) $@
-
-install:
-	-yes | sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"
-
-define archhelp
-  echo '* compressed	- Build compressed kernel image'
-  echo '  install	- Install compressed kernel image'
-  echo '  boot		- Build vmlinux and bootloader for Ski simulator'
-  echo '* unwcheck	- Check vmlinux for invalid unwind info'
-endef
diff --git a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c b/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
deleted file mode 100644
index c0f6eac819..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/hp/common/sba_iommu.c
+++ /dev/null
@@ -1,2160 +0,0 @@
-/*
-**  IA64 System Bus Adapter (SBA) I/O MMU manager
-**
-**	(c) Copyright 2002-2005 Alex Williamson
-**	(c) Copyright 2002-2003 Grant Grundler
-**	(c) Copyright 2002-2005 Hewlett-Packard Company
-**
-**	Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
-**	Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
-**
-**	This program is free software; you can redistribute it and/or modify
-**	it under the terms of the GNU General Public License as published by
-**      the Free Software Foundation; either version 2 of the License, or
-**      (at your option) any later version.
-**
-**
-** This module initializes the IOC (I/O Controller) found on HP
-** McKinley machines and their successors.
-**
-*/
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/nodemask.h>
-#include <linux/bitops.h>         /* hweight64() */
-
-#include <asm/delay.h>		/* ia64_get_itc() */
-#include <asm/io.h>
-#include <asm/page.h>		/* PAGE_OFFSET */
-#include <asm/dma.h>
-#include <asm/system.h>		/* wmb() */
-
-#include <asm/acpi-ext.h>
-
-#define PFX "IOC: "
-
-/*
-** Enabling timing search of the pdir resource map.  Output in /proc.
-** Disabled by default to optimize performance.
-*/
-#undef PDIR_SEARCH_TIMING
-
-/*
-** This option allows cards capable of 64bit DMA to bypass the IOMMU.  If
-** not defined, all DMA will be 32bit and go through the TLB.
-** There's potentially a conflict in the bio merge code with us
-** advertising an iommu, but then bypassing it.  Since I/O MMU bypassing
-** appears to give more performance than bio-level virtual merging, we'll
-** do the former for now.  NOTE: BYPASS_SG also needs to be undef'd to
-** completely restrict DMA to the IOMMU.
-*/
-#define ALLOW_IOV_BYPASS
-
-/*
-** This option specifically allows/disallows bypassing scatterlists with
-** multiple entries.  Coalescing these entries can allow better DMA streaming
-** and in some cases shows better performance than entirely bypassing the
-** IOMMU.  Performance increase on the order of 1-2% sequential output/input
-** using bonnie++ on a RAID0 MD device (sym2 & mpt).
-*/
-#undef ALLOW_IOV_BYPASS_SG
-
-/*
-** If a device prefetches beyond the end of a valid pdir entry, it will cause
-** a hard failure, ie. MCA.  Version 3.0 and later of the zx1 LBA should
-** disconnect on 4k boundaries and prevent such issues.  If the device is
-** particularly agressive, this option will keep the entire pdir valid such
-** that prefetching will hit a valid address.  This could severely impact
-** error containment, and is therefore off by default.  The page that is
-** used for spill-over is poisoned, so that should help debugging somewhat.
-*/
-#undef FULL_VALID_PDIR
-
-#define ENABLE_MARK_CLEAN
-
-/*
-** The number of debug flags is a clue - this code is fragile.  NOTE: since
-** tightening the use of res_lock the resource bitmap and actual pdir are no
-** longer guaranteed to stay in sync.  The sanity checking code isn't going to
-** like that.
-*/
-#undef DEBUG_SBA_INIT
-#undef DEBUG_SBA_RUN
-#undef DEBUG_SBA_RUN_SG
-#undef DEBUG_SBA_RESOURCE
-#undef ASSERT_PDIR_SANITY
-#undef DEBUG_LARGE_SG_ENTRIES
-#undef DEBUG_BYPASS
-
-#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)
-#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive
-#endif
-
-#define SBA_INLINE	__inline__
-/* #define SBA_INLINE */
-
-#ifdef DEBUG_SBA_INIT
-#define DBG_INIT(x...)	printk(x)
-#else
-#define DBG_INIT(x...)
-#endif
-
-#ifdef DEBUG_SBA_RUN
-#define DBG_RUN(x...)	printk(x)
-#else
-#define DBG_RUN(x...)
-#endif
-
-#ifdef DEBUG_SBA_RUN_SG
-#define DBG_RUN_SG(x...)	printk(x)
-#else
-#define DBG_RUN_SG(x...)
-#endif
-
-
-#ifdef DEBUG_SBA_RESOURCE
-#define DBG_RES(x...)	printk(x)
-#else
-#define DBG_RES(x...)
-#endif
-
-#ifdef DEBUG_BYPASS
-#define DBG_BYPASS(x...)	printk(x)
-#else
-#define DBG_BYPASS(x...)
-#endif
-
-#ifdef ASSERT_PDIR_SANITY
-#define ASSERT(expr) \
-        if(!(expr)) { \
-                printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
-                panic(#expr); \
-        }
-#else
-#define ASSERT(expr)
-#endif
-
-/*
-** The number of pdir entries to "free" before issuing
-** a read to PCOM register to flush out PCOM writes.
-** Interacts with allocation granularity (ie 4 or 8 entries
-** allocated and free'd/purged at a time might make this
-** less interesting).
-*/
-#define DELAYED_RESOURCE_CNT	64
-
-#define PCI_DEVICE_ID_HP_SX2000_IOC	0x12ec
-
-#define ZX1_IOC_ID	((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)
-#define ZX2_IOC_ID	((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)
-#define REO_IOC_ID	((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)
-#define SX1000_IOC_ID	((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)
-#define SX2000_IOC_ID	((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP)
-
-#define ZX1_IOC_OFFSET	0x1000	/* ACPI reports SBA, we want IOC */
-
-#define IOC_FUNC_ID	0x000
-#define IOC_FCLASS	0x008	/* function class, bist, header, rev... */
-#define IOC_IBASE	0x300	/* IO TLB */
-#define IOC_IMASK	0x308
-#define IOC_PCOM	0x310
-#define IOC_TCNFG	0x318
-#define IOC_PDIR_BASE	0x320
-
-#define IOC_ROPE0_CFG	0x500
-#define   IOC_ROPE_AO	  0x10	/* Allow "Relaxed Ordering" */
-
-
-/* AGP GART driver looks for this */
-#define ZX1_SBA_IOMMU_COOKIE	0x0000badbadc0ffeeUL
-
-/*
-** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)
-**
-** Some IOCs (sx1000) can run at the above pages sizes, but are
-** really only supported using the IOC at a 4k page size.
-**
-** iovp_size could only be greater than PAGE_SIZE if we are
-** confident the drivers really only touch the next physical
-** page iff that driver instance owns it.
-*/
-static unsigned long iovp_size;
-static unsigned long iovp_shift;
-static unsigned long iovp_mask;
-
-struct ioc {
-	void __iomem	*ioc_hpa;	/* I/O MMU base address */
-	char		*res_map;	/* resource map, bit == pdir entry */
-	u64		*pdir_base;	/* physical base address */
-	unsigned long	ibase;		/* pdir IOV Space base */
-	unsigned long	imask;		/* pdir IOV Space mask */
-
-	unsigned long	*res_hint;	/* next avail IOVP - circular search */
-	unsigned long	dma_mask;
-	spinlock_t	res_lock;	/* protects the resource bitmap, but must be held when */
-					/* clearing pdir to prevent races with allocations. */
-	unsigned int	res_bitshift;	/* from the RIGHT! */
-	unsigned int	res_size;	/* size of resource map in bytes */
-#ifdef CONFIG_NUMA
-	unsigned int	node;		/* node where this IOC lives */
-#endif
-#if DELAYED_RESOURCE_CNT > 0
-	spinlock_t	saved_lock;	/* may want to try to get this on a separate cacheline */
-					/* than res_lock for bigger systems. */
-	int		saved_cnt;
-	struct sba_dma_pair {
-		dma_addr_t	iova;
-		size_t		size;
-	} saved[DELAYED_RESOURCE_CNT];
-#endif
-
-#ifdef PDIR_SEARCH_TIMING
-#define SBA_SEARCH_SAMPLE	0x100
-	unsigned long avg_search[SBA_SEARCH_SAMPLE];
-	unsigned long avg_idx;	/* current index into avg_search */
-#endif
-
-	/* Stuff we don't need in performance path */
-	struct ioc	*next;		/* list of IOC's in system */
-	acpi_handle	handle;		/* for multiple IOC's */
-	const char 	*name;
-	unsigned int	func_id;
-	unsigned int	rev;		/* HW revision of chip */
-	u32		iov_size;
-	unsigned int	pdir_size;	/* in bytes, determined by IOV Space size */
-	struct pci_dev	*sac_only_dev;
-};
-
-static struct ioc *ioc_list;
-static int reserve_sba_gart = 1;
-
-static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
-static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
-
-#define sba_sg_address(sg)	(page_address((sg)->page) + (sg)->offset)
-
-#ifdef FULL_VALID_PDIR
-static u64 prefetch_spill_page;
-#endif
-
-#ifdef CONFIG_PCI
-# define GET_IOC(dev)	(((dev)->bus == &pci_bus_type)						\
-			 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
-#else
-# define GET_IOC(dev)	NULL
-#endif
-
-/*
-** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
-** (or rather not merge) DMA's into managable chunks.
-** On parisc, this is more of the software/tuning constraint
-** rather than the HW. I/O MMU allocation alogorithms can be
-** faster with smaller size is (to some degree).
-*/
-#define DMA_CHUNK_SIZE  (BITS_PER_LONG*iovp_size)
-
-#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
-
-/************************************
-** SBA register read and write support
-**
-** BE WARNED: register writes are posted.
-**  (ie follow writes which must reach HW with a read)
-**
-*/
-#define READ_REG(addr)       __raw_readq(addr)
-#define WRITE_REG(val, addr) __raw_writeq(val, addr)
-
-#ifdef DEBUG_SBA_INIT
-
-/**
- * sba_dump_tlb - debugging only - print IOMMU operating parameters
- * @hpa: base address of the IOMMU
- *
- * Print the size/location of the IO MMU PDIR.
- */
-static void
-sba_dump_tlb(char *hpa)
-{
-	DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
-	DBG_INIT("IOC_IBASE    : %016lx\n", READ_REG(hpa+IOC_IBASE));
-	DBG_INIT("IOC_IMASK    : %016lx\n", READ_REG(hpa+IOC_IMASK));
-	DBG_INIT("IOC_TCNFG    : %016lx\n", READ_REG(hpa+IOC_TCNFG));
-	DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
-	DBG_INIT("\n");
-}
-#endif
-
-
-#ifdef ASSERT_PDIR_SANITY
-
-/**
- * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @msg: text to print ont the output line.
- * @pide: pdir index.
- *
- * Print one entry of the IO MMU PDIR in human readable form.
- */
-static void
-sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
-{
-	/* start printing from lowest pde in rval */
-	u64 *ptr = &ioc->pdir_base[pide  & ~(BITS_PER_LONG - 1)];
-	unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];
-	uint rcnt;
-
-	printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
-		 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
-
-	rcnt = 0;
-	while (rcnt < BITS_PER_LONG) {
-		printk(KERN_DEBUG "%s %2d %p %016Lx\n",
-		       (rcnt == (pide & (BITS_PER_LONG - 1)))
-		       ? "    -->" : "       ",
-		       rcnt, ptr, (unsigned long long) *ptr );
-		rcnt++;
-		ptr++;
-	}
-	printk(KERN_DEBUG "%s", msg);
-}
-
-
-/**
- * sba_check_pdir - debugging only - consistency checker
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @msg: text to print ont the output line.
- *
- * Verify the resource map and pdir state is consistent
- */
-static int
-sba_check_pdir(struct ioc *ioc, char *msg)
-{
-	u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
-	u64 *rptr = (u64 *) ioc->res_map;	/* resource map ptr */
-	u64 *pptr = ioc->pdir_base;	/* pdir ptr */
-	uint pide = 0;
-
-	while (rptr < rptr_end) {
-		u64 rval;
-		int rcnt; /* number of bits we might check */
-
-		rval = *rptr;
-		rcnt = 64;
-
-		while (rcnt) {
-			/* Get last byte and highest bit from that */
-			u32 pde = ((u32)((*pptr >> (63)) & 0x1));
-			if ((rval & 0x1) ^ pde)
-			{
-				/*
-				** BUMMER!  -- res_map != pdir --
-				** Dump rval and matching pdir entries
-				*/
-				sba_dump_pdir_entry(ioc, msg, pide);
-				return(1);
-			}
-			rcnt--;
-			rval >>= 1;	/* try the next bit */
-			pptr++;
-			pide++;
-		}
-		rptr++;	/* look at next word of res_map */
-	}
-	/* It'd be nice if we always got here :^) */
-	return 0;
-}
-
-
-/**
- * sba_dump_sg - debugging only - print Scatter-Gather list
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg: head of the SG list
- * @nents: number of entries in SG list
- *
- * print the SG list so we can verify it's correct by hand.
- */
-static void
-sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
-{
-	while (nents-- > 0) {
-		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
-		       startsg->dma_address, startsg->dma_length,
-		       sba_sg_address(startsg));
-		startsg++;
-	}
-}
-
-static void
-sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
-{
-	struct scatterlist *the_sg = startsg;
-	int the_nents = nents;
-
-	while (the_nents-- > 0) {
-		if (sba_sg_address(the_sg) == 0x0UL)
-			sba_dump_sg(NULL, startsg, nents);
-		the_sg++;
-	}
-}
-
-#endif /* ASSERT_PDIR_SANITY */
-
-
-
-
-/**************************************************************
-*
-*   I/O Pdir Resource Management
-*
-*   Bits set in the resource map are in use.
-*   Each bit can represent a number of pages.
-*   LSbs represent lower addresses (IOVA's).
-*
-***************************************************************/
-#define PAGES_PER_RANGE 1	/* could increase this to 4 or 8 if needed */
-
-/* Convert from IOVP to IOVA and vice versa. */
-#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))
-#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))
-
-#define PDIR_ENTRY_SIZE	sizeof(u64)
-
-#define PDIR_INDEX(iovp)   ((iovp)>>iovp_shift)
-
-#define RESMAP_MASK(n)    ~(~0UL << (n))
-#define RESMAP_IDX_MASK   (sizeof(unsigned long) - 1)
-
-
-/**
- * For most cases the normal get_order is sufficient, however it limits us
- * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
- * It only incurs about 1 clock cycle to use this one with the static variable
- * and makes the code more intuitive.
- */
-static SBA_INLINE int
-get_iovp_order (unsigned long size)
-{
-	long double d = size - 1;
-	long order;
-
-	order = ia64_getf_exp(d);
-	order = order - iovp_shift - 0xffff + 1;
-	if (order < 0)
-		order = 0;
-	return order;
-}
-
-/**
- * sba_search_bitmap - find free space in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @bits_wanted: number of entries we need.
- * @use_hint: use res_hint to indicate where to start looking
- *
- * Find consecutive free bits in resource bitmap.
- * Each bit represents one entry in the IO Pdir.
- * Cool perf optimization: search for log2(size) bits at a time.
- */
-static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
-{
-	unsigned long *res_ptr;
-	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long flags, pide = ~0UL;
-
-	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
-	ASSERT(res_ptr < res_end);
-
-	spin_lock_irqsave(&ioc->res_lock, flags);
-
-	/* Allow caller to force a search through the entire resource space */
-	if (likely(use_hint)) {
-		res_ptr = ioc->res_hint;
-	} else {
-		res_ptr = (ulong *)ioc->res_map;
-		ioc->res_bitshift = 0;
-	}
-
-	/*
-	 * N.B.  REO/Grande defect AR2305 can cause TLB fetch timeouts
-	 * if a TLB entry is purged while in use.  sba_mark_invalid()
-	 * purges IOTLB entries in power-of-two sizes, so we also
-	 * allocate IOVA space in power-of-two sizes.
-	 */
-	bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
-
-	if (likely(bits_wanted == 1)) {
-		unsigned int bitshiftcnt;
-		for(; res_ptr < res_end ; res_ptr++) {
-			if (likely(*res_ptr != ~0UL)) {
-				bitshiftcnt = ffz(*res_ptr);
-				*res_ptr |= (1UL << bitshiftcnt);
-				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-				pide <<= 3;	/* convert to bit address */
-				pide += bitshiftcnt;
-				ioc->res_bitshift = bitshiftcnt + bits_wanted;
-				goto found_it;
-			}
-		}
-		goto not_found;
-
-	}
-	
-	if (likely(bits_wanted <= BITS_PER_LONG/2)) {
-		/*
-		** Search the resource bit map on well-aligned values.
-		** "o" is the alignment.
-		** We need the alignment to invalidate I/O TLB using
-		** SBA HW features in the unmap path.
-		*/
-		unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);
-		uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
-		unsigned long mask, base_mask;
-
-		base_mask = RESMAP_MASK(bits_wanted);
-		mask = base_mask << bitshiftcnt;
-
-		DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
-		for(; res_ptr < res_end ; res_ptr++)
-		{ 
-			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
-			ASSERT(0 != mask);
-			for (; mask ; mask <<= o, bitshiftcnt += o) {
-				if(0 == ((*res_ptr) & mask)) {
-					*res_ptr |= mask;     /* mark resources busy! */
-					pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-					pide <<= 3;	/* convert to bit address */
-					pide += bitshiftcnt;
-					ioc->res_bitshift = bitshiftcnt + bits_wanted;
-					goto found_it;
-				}
-			}
-
-			bitshiftcnt = 0;
-			mask = base_mask;
-
-		}
-
-	} else {
-		int qwords, bits, i;
-		unsigned long *end;
-
-		qwords = bits_wanted >> 6; /* /64 */
-		bits = bits_wanted - (qwords * BITS_PER_LONG);
-
-		end = res_end - qwords;
-
-		for (; res_ptr < end; res_ptr++) {
-			for (i = 0 ; i < qwords ; i++) {
-				if (res_ptr[i] != 0)
-					goto next_ptr;
-			}
-			if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits))
-				continue;
-
-			/* Found it, mark it */
-			for (i = 0 ; i < qwords ; i++)
-				res_ptr[i] = ~0UL;
-			res_ptr[i] |= RESMAP_MASK(bits);
-
-			pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-			pide <<= 3;	/* convert to bit address */
-			res_ptr += qwords;
-			ioc->res_bitshift = bits;
-			goto found_it;
-next_ptr:
-			;
-		}
-	}
-
-not_found:
-	prefetch(ioc->res_map);
-	ioc->res_hint = (unsigned long *) ioc->res_map;
-	ioc->res_bitshift = 0;
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-	return (pide);
-
-found_it:
-	ioc->res_hint = res_ptr;
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-	return (pide);
-}
-
-
-/**
- * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @size: number of bytes to create a mapping for
- *
- * Given a size, find consecutive unmarked and then mark those bits in the
- * resource bit map.
- */
-static int
-sba_alloc_range(struct ioc *ioc, size_t size)
-{
-	unsigned int pages_needed = size >> iovp_shift;
-#ifdef PDIR_SEARCH_TIMING
-	unsigned long itc_start;
-#endif
-	unsigned long pide;
-
-	ASSERT(pages_needed);
-	ASSERT(0 == (size & ~iovp_mask));
-
-#ifdef PDIR_SEARCH_TIMING
-	itc_start = ia64_get_itc();
-#endif
-	/*
-	** "seek and ye shall find"...praying never hurts either...
-	*/
-	pide = sba_search_bitmap(ioc, pages_needed, 1);
-	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed, 0);
-		if (unlikely(pide >= (ioc->res_size << 3))) {
-#if DELAYED_RESOURCE_CNT > 0
-			unsigned long flags;
-
-			/*
-			** With delayed resource freeing, we can give this one more shot.  We're
-			** getting close to being in trouble here, so do what we can to make this
-			** one count.
-			*/
-			spin_lock_irqsave(&ioc->saved_lock, flags);
-			if (ioc->saved_cnt > 0) {
-				struct sba_dma_pair *d;
-				int cnt = ioc->saved_cnt;
-
-				d = &(ioc->saved[ioc->saved_cnt - 1]);
-
-				spin_lock(&ioc->res_lock);
-				while (cnt--) {
-					sba_mark_invalid(ioc, d->iova, d->size);
-					sba_free_range(ioc, d->iova, d->size);
-					d--;
-				}
-				ioc->saved_cnt = 0;
-				READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
-				spin_unlock(&ioc->res_lock);
-			}
-			spin_unlock_irqrestore(&ioc->saved_lock, flags);
-
-			pide = sba_search_bitmap(ioc, pages_needed, 0);
-			if (unlikely(pide >= (ioc->res_size << 3)))
-				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
-				      ioc->ioc_hpa);
-#else
-			panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
-			      ioc->ioc_hpa);
-#endif
-		}
-	}
-
-#ifdef PDIR_SEARCH_TIMING
-	ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed;
-	ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
-#endif
-
-	prefetchw(&(ioc->pdir_base[pide]));
-
-#ifdef ASSERT_PDIR_SANITY
-	/* verify the first enable bit is clear */
-	if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) {
-		sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
-	}
-#endif
-
-	DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
-		__FUNCTION__, size, pages_needed, pide,
-		(uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
-		ioc->res_bitshift );
-
-	return (pide);
-}
-
-
-/**
- * sba_free_range - unmark bits in IO PDIR resource bitmap
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @iova: IO virtual address which was previously allocated.
- * @size: number of bytes to create a mapping for
- *
- * clear bits in the ioc's resource map
- */
-static SBA_INLINE void
-sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
-{
-	unsigned long iovp = SBA_IOVP(ioc, iova);
-	unsigned int pide = PDIR_INDEX(iovp);
-	unsigned int ridx = pide >> 3;	/* convert bit to byte address */
-	unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
-	int bits_not_wanted = size >> iovp_shift;
-	unsigned long m;
-
-	/* Round up to power-of-two size: see AR2305 note above */
-	bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
-	for (; bits_not_wanted > 0 ; res_ptr++) {
-		
-		if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
-
-			/* these mappings start 64bit aligned */
-			*res_ptr = 0UL;
-			bits_not_wanted -= BITS_PER_LONG;
-			pide += BITS_PER_LONG;
-
-		} else {
-
-			/* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
-			m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
-			bits_not_wanted = 0;
-
-			DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size,
-		        	bits_not_wanted, m, pide, res_ptr, *res_ptr);
-
-			ASSERT(m != 0);
-			ASSERT(bits_not_wanted);
-			ASSERT((*res_ptr & m) == m); /* verify same bits are set */
-			*res_ptr &= ~m;
-		}
-	}
-}
-
-
-/**************************************************************
-*
-*   "Dynamic DMA Mapping" support (aka "Coherent I/O")
-*
-***************************************************************/
-
-/**
- * sba_io_pdir_entry - fill in one IO PDIR entry
- * @pdir_ptr:  pointer to IO PDIR entry
- * @vba: Virtual CPU address of buffer to map
- *
- * SBA Mapping Routine
- *
- * Given a virtual address (vba, arg1) sba_io_pdir_entry()
- * loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
- * Each IO Pdir entry consists of 8 bytes as shown below
- * (LSB == bit 0):
- *
- *  63                    40                                 11    7        0
- * +-+---------------------+----------------------------------+----+--------+
- * |V|        U            |            PPN[39:12]            | U  |   FF   |
- * +-+---------------------+----------------------------------+----+--------+
- *
- *  V  == Valid Bit
- *  U  == Unused
- * PPN == Physical Page Number
- *
- * The physical address fields are filled with the results of virt_to_phys()
- * on the vba.
- */
-
-#if 1
-#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr =	\
-	((virt_to_bus((void *)vba) & ~0xFFFULL) | 0x8000000000000000ULL)
-#else
-void SBA_INLINE
-sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
-{
-	*pdir_ptr = ((virt_to_bus((void *)vba) & ~0xFFFULL) |
-		    0x80000000000000FFULL);
-}
-#endif
-
-#ifdef ENABLE_MARK_CLEAN
-/**
- * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
- * flush them when they get mapped into an executable vm-area.
- */
-static void
-mark_clean (void *addr, size_t size)
-{
-	unsigned long pg_addr, end;
-
-#ifdef CONFIG_XEN
-	/* XXX: Bad things happen starting domUs when this is enabled. */
-	if (is_running_on_xen())
-		return;
-#endif
-
-	pg_addr = PAGE_ALIGN((unsigned long) addr);
-	end = (unsigned long) addr + size;
-	while (pg_addr + PAGE_SIZE <= end) {
-		struct page *page = virt_to_page((void *)pg_addr);
-		set_bit(PG_arch_1, &page->flags);
-		pg_addr += PAGE_SIZE;
-	}
-}
-#endif
-
-/**
- * sba_mark_invalid - invalidate one or more IO PDIR entries
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @iova:  IO Virtual Address mapped earlier
- * @byte_cnt:  number of bytes this mapping covers.
- *
- * Marking the IO PDIR entry(ies) as Invalid and invalidate
- * corresponding IO TLB entry. The PCOM (Purge Command Register)
- * is to purge stale entries in the IO TLB when unmapping entries.
- *
- * The PCOM register supports purging of multiple pages, with a minium
- * of 1 page and a maximum of 2GB. Hardware requires the address be
- * aligned to the size of the range being purged. The size of the range
- * must be a power of 2. The "Cool perf optimization" in the
- * allocation routine helps keep that true.
- */
-static SBA_INLINE void
-sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
-{
-	u32 iovp = (u32) SBA_IOVP(ioc,iova);
-
-	int off = PDIR_INDEX(iovp);
-
-	/* Must be non-zero and rounded up */
-	ASSERT(byte_cnt > 0);
-	ASSERT(0 == (byte_cnt & ~iovp_mask));
-
-#ifdef ASSERT_PDIR_SANITY
-	/* Assert first pdir entry is set */
-	if (!(ioc->pdir_base[off] >> 60)) {
-		sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
-	}
-#endif
-
-	if (byte_cnt <= iovp_size)
-	{
-		ASSERT(off < ioc->pdir_size);
-
-		iovp |= iovp_shift;     /* set "size" field for PCOM */
-
-#ifndef FULL_VALID_PDIR
-		/*
-		** clear I/O PDIR entry "valid" bit
-		** Do NOT clear the rest - save it for debugging.
-		** We should only clear bits that have previously
-		** been enabled.
-		*/
-		ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
-#else
-		/*
-  		** If we want to maintain the PDIR as valid, put in
-		** the spill page so devices prefetching won't
-		** cause a hard fail.
-		*/
-		ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
-#endif
-	} else {
-		u32 t = get_iovp_order(byte_cnt) + iovp_shift;
-
-		iovp |= t;
-		ASSERT(t <= 31);   /* 2GB! Max value of "size" field */
-
-		do {
-			/* verify this pdir entry is enabled */
-			ASSERT(ioc->pdir_base[off]  >> 63);
-#ifndef FULL_VALID_PDIR
-			/* clear I/O Pdir entry "valid" bit first */
-			ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
-#else
-			ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
-#endif
-			off++;
-			byte_cnt -= iovp_size;
-		} while (byte_cnt > 0);
-	}
-
-	WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM);
-}
-
-/**
- * sba_map_single - map one buffer and return IOVA for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @addr:  driver buffer to map.
- * @size:  number of bytes to map in driver buffer.
- * @dir:  R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-dma_addr_t
-sba_map_single(struct device *dev, void *addr, size_t size, int dir)
-{
-	struct ioc *ioc;
-	dma_addr_t iovp;
-	dma_addr_t offset;
-	u64 *pdir_start;
-	int pide;
-#ifdef ASSERT_PDIR_SANITY
-	unsigned long flags;
-#endif
-#ifdef ALLOW_IOV_BYPASS
-	unsigned long pci_addr = virt_to_bus(addr);
-
-	ASSERT(to_pci_dev(dev)->dma_mask);
-	/*
- 	** Check if the PCI device can DMA to ptr... if so, just return ptr
- 	*/
-	if (likely(pci_addr & ~to_pci_dev(dev)->dma_mask) == 0 &&
-		   !range_straddles_page_boundary(addr, size)) {
-		/*
- 		** Device is bit capable of DMA'ing to the buffer...
-		** just return the PCI address of ptr
- 		*/
-		DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
-		           to_pci_dev(dev)->dma_mask, pci_addr);
-		return pci_addr;
-	}
-#endif
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
-
-	prefetch(ioc->res_hint);
-
-	ASSERT(size > 0);
-	ASSERT(size <= DMA_CHUNK_SIZE);
-
-	/* save offset bits */
-	offset = ((dma_addr_t) (long) addr) & ~iovp_mask;
-
-	/* round up to nearest iovp_size */
-	size = (size + offset + ~iovp_mask) & iovp_mask;
-
-#ifdef ASSERT_PDIR_SANITY
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_single()"))
-		panic("Sanity check failed");
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-	pide = sba_alloc_range(ioc, size);
-
-	iovp = (dma_addr_t) pide << iovp_shift;
-
-	DBG_RUN("%s() 0x%p -> 0x%lx\n",
-		__FUNCTION__, addr, (long) iovp | offset);
-
-	pdir_start = &(ioc->pdir_base[pide]);
-
-	while (size > 0) {
-		ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
-		sba_io_pdir_entry(pdir_start, (unsigned long) addr);
-
-		DBG_RUN("     pdir 0x%p %lx\n", pdir_start, *pdir_start);
-
-		addr += iovp_size;
-		size -= iovp_size;
-		pdir_start++;
-	}
-	/* force pdir update */
-	wmb();
-
-	/* form complete address */
-#ifdef ASSERT_PDIR_SANITY
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_map_single()");
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-	return SBA_IOVA(ioc, iovp, offset);
-}
-
-#ifdef ENABLE_MARK_CLEAN
-static SBA_INLINE void
-sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
-{
-	u32	iovp = (u32) SBA_IOVP(ioc,iova);
-	int	off = PDIR_INDEX(iovp);
-	void	*addr;
-
-	if (size <= iovp_size) {
-		addr = bus_to_virt(ioc->pdir_base[off] &
-				   ~0xE000000000000FFFULL);
-		mark_clean(addr, size);
-	} else {
-		do {
-			addr = bus_to_virt(ioc->pdir_base[off] &
-					   ~0xE000000000000FFFULL);
-			mark_clean(addr, min(size, iovp_size));
-			off++;
-			size -= iovp_size;
-		} while (size > 0);
-	}
-}
-#endif
-
-/**
- * sba_unmap_single - unmap one IOVA and free resources
- * @dev: instance of PCI owned by the driver that's asking.
- * @iova:  IOVA of driver buffer previously mapped.
- * @size:  number of bytes mapped in driver buffer.
- * @dir:  R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
-{
-	struct ioc *ioc;
-#if DELAYED_RESOURCE_CNT > 0
-	struct sba_dma_pair *d;
-#endif
-	unsigned long flags;
-	dma_addr_t offset;
-
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
-
-#ifdef ALLOW_IOV_BYPASS
-	if (likely((iova & ioc->imask) != ioc->ibase)) {
-		/*
-		** Address does not fall w/in IOVA, must be bypassing
-		*/
-		DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
-
-#ifdef ENABLE_MARK_CLEAN
-		if (dir == DMA_FROM_DEVICE) {
-			mark_clean(bus_to_virt(iova), size);
-		}
-#endif
-		return;
-	}
-#endif
-	offset = iova & ~iovp_mask;
-
-	DBG_RUN("%s() iovp 0x%lx/%x\n",
-		__FUNCTION__, (long) iova, size);
-
-	iova ^= offset;        /* clear offset bits */
-	size += offset;
-	size = ROUNDUP(size, iovp_size);
-
-#ifdef ENABLE_MARK_CLEAN
-	if (dir == DMA_FROM_DEVICE)
-		sba_mark_clean(ioc, iova, size);
-#endif
-
-#if DELAYED_RESOURCE_CNT > 0
-	spin_lock_irqsave(&ioc->saved_lock, flags);
-	d = &(ioc->saved[ioc->saved_cnt]);
-	d->iova = iova;
-	d->size = size;
-	if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) {
-		int cnt = ioc->saved_cnt;
-		spin_lock(&ioc->res_lock);
-		while (cnt--) {
-			sba_mark_invalid(ioc, d->iova, d->size);
-			sba_free_range(ioc, d->iova, d->size);
-			d--;
-		}
-		ioc->saved_cnt = 0;
-		READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
-		spin_unlock(&ioc->res_lock);
-	}
-	spin_unlock_irqrestore(&ioc->saved_lock, flags);
-#else /* DELAYED_RESOURCE_CNT == 0 */
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_mark_invalid(ioc, iova, size);
-	sba_free_range(ioc, iova, size);
-	READ_REG(ioc->ioc_hpa+IOC_PCOM);	/* flush purges */
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif /* DELAYED_RESOURCE_CNT == 0 */
-}
-
-
-/**
- * sba_alloc_coherent - allocate/map shared mem for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @size:  number of bytes mapped in driver buffer.
- * @dma_handle:  IOVA of new buffer.
- *
- * See Documentation/DMA-mapping.txt
- */
-void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
-{
-	struct ioc *ioc;
-	void *addr;
-
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
-
-#ifdef CONFIG_NUMA
-	{
-		struct page *page;
-		page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
-		                        numa_node_id() : ioc->node, flags,
-		                        get_order(size));
-
-		if (unlikely(!page))
-			return NULL;
-
-		addr = page_address(page);
-	}
-#else
-	addr = (void *) __get_free_pages(flags, get_order(size));
-#endif
-	if (unlikely(!addr))
-		return NULL;
-
-	memset(addr, 0, size);
-
-#ifdef ALLOW_IOV_BYPASS
-#ifdef CONFIG_XEN
-	if (xen_create_contiguous_region((unsigned long)addr, get_order(size),
-					 fls64(dev->coherent_dma_mask)))
-		goto iommu_map;
-#endif
-	*dma_handle = virt_to_bus(addr);
-	ASSERT(dev->coherent_dma_mask);
-	/*
- 	** Check if the PCI device can DMA to ptr... if so, just return ptr
- 	*/
-	if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) {
-		DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n",
-		           dev->coherent_dma_mask, *dma_handle);
-
-		return addr;
-	}
-#ifdef CONFIG_XEN
-iommu_map:
-#endif
-#endif
-
-	/*
-	 * If device can't bypass or bypass is disabled, pass the 32bit fake
-	 * device to map single to get an iova mapping.
-	 */
-	*dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
-
-	return addr;
-}
-
-
-/**
- * sba_free_coherent - free/unmap shared mem for DMA
- * @dev: instance of PCI owned by the driver that's asking.
- * @size:  number of bytes mapped in driver buffer.
- * @vaddr:  virtual address IOVA of "consistent" buffer.
- * @dma_handler:  IO virtual address of "consistent" buffer.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
-{
-#if defined(ALLOW_IOV_BYPASS) && defined(CONFIG_XEN)
-	struct ioc *ioc = GET_IOC(dev);
-
-	if (likely((dma_handle & ioc->imask) != ioc->ibase))
-		xen_destroy_contiguous_region((unsigned long)vaddr,
-					      get_order(size));
-#endif
-	sba_unmap_single(dev, dma_handle, size, 0);
-	free_pages((unsigned long) vaddr, get_order(size));
-}
-
-
-/*
-** Since 0 is a valid pdir_base index value, can't use that
-** to determine if a value is valid or not. Use a flag to indicate
-** the SG list entry contains a valid pdir index.
-*/
-#define PIDE_FLAG 0x1UL
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
-int dump_run_sg = 0;
-#endif
-
-
-/**
- * sba_fill_pdir - write allocated SG entries into IO PDIR
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg:  list of IOVA/size pairs
- * @nents: number of entries in startsg list
- *
- * Take preprocessed SG list and write corresponding entries
- * in the IO PDIR.
- */
-
-static SBA_INLINE int
-sba_fill_pdir(
-	struct ioc *ioc,
-	struct scatterlist *startsg,
-	int nents)
-{
-	struct scatterlist *dma_sg = startsg;	/* pointer to current DMA */
-	int n_mappings = 0;
-	u64 *pdirp = NULL;
-	unsigned long dma_offset = 0;
-
-	dma_sg--;
-	while (nents-- > 0) {
-		int     cnt = startsg->dma_length;
-		startsg->dma_length = 0;
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
-		if (dump_run_sg)
-			printk(" %2d : %08lx/%05x %p\n",
-				nents, startsg->dma_address, cnt,
-				sba_sg_address(startsg));
-#else
-		DBG_RUN_SG(" %d : %08lx/%05x %p\n",
-				nents, startsg->dma_address, cnt,
-				sba_sg_address(startsg));
-#endif
-		/*
-		** Look for the start of a new DMA stream
-		*/
-		if (startsg->dma_address & PIDE_FLAG) {
-			u32 pide = startsg->dma_address & ~PIDE_FLAG;
-			dma_offset = (unsigned long) pide & ~iovp_mask;
-			startsg->dma_address = 0;
-			dma_sg++;
-			dma_sg->dma_address = pide | ioc->ibase;
-			pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
-			n_mappings++;
-		}
-
-		/*
-		** Look for a VCONTIG chunk
-		*/
-		if (cnt) {
-			unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
-			ASSERT(pdirp);
-
-			/* Since multiple Vcontig blocks could make up
-			** one DMA stream, *add* cnt to dma_len.
-			*/
-			dma_sg->dma_length += cnt;
-			cnt += dma_offset;
-			dma_offset=0;	/* only want offset on first chunk */
-			cnt = ROUNDUP(cnt, iovp_size);
-			do {
-				sba_io_pdir_entry(pdirp, vaddr);
-				vaddr += iovp_size;
-				cnt -= iovp_size;
-				pdirp++;
-			} while (cnt > 0);
-		}
-		startsg++;
-	}
-	/* force pdir update */
-	wmb();
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
-	dump_run_sg = 0;
-#endif
-	return(n_mappings);
-}
-
-
-/*
-** Two address ranges are DMA contiguous *iff* "end of prev" and
-** "start of next" are both on an IOV page boundary.
-**
-** (shift left is a quick trick to mask off upper bits)
-*/
-#define DMA_CONTIG(__X, __Y) \
-	(((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL)
-
-
-/**
- * sba_coalesce_chunks - preprocess the SG list
- * @ioc: IO MMU structure which owns the pdir we are interested in.
- * @startsg:  list of IOVA/size pairs
- * @nents: number of entries in startsg list
- *
- * First pass is to walk the SG list and determine where the breaks are
- * in the DMA stream. Allocates PDIR entries but does not fill them.
- * Returns the number of DMA chunks.
- *
- * Doing the fill separate from the coalescing/allocation keeps the
- * code simpler. Future enhancement could make one pass through
- * the sglist do both.
- */
-static SBA_INLINE int
-sba_coalesce_chunks( struct ioc *ioc,
-	struct scatterlist *startsg,
-	int nents)
-{
-	struct scatterlist *vcontig_sg;    /* VCONTIG chunk head */
-	unsigned long vcontig_len;         /* len of VCONTIG chunk */
-	unsigned long vcontig_end;
-	struct scatterlist *dma_sg;        /* next DMA stream head */
-	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
-	int n_mappings = 0;
-
-	while (nents > 0) {
-		unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
-
-		/*
-		** Prepare for first/next DMA stream
-		*/
-		dma_sg = vcontig_sg = startsg;
-		dma_len = vcontig_len = vcontig_end = startsg->length;
-		vcontig_end +=  vaddr;
-		dma_offset = vaddr & ~iovp_mask;
-
-		/* PARANOID: clear entries */
-		startsg->dma_address = startsg->dma_length = 0;
-
-		/*
-		** This loop terminates one iteration "early" since
-		** it's always looking one "ahead".
-		*/
-		while (--nents > 0) {
-			unsigned long vaddr;	/* tmp */
-
-			startsg++;
-
-			/* PARANOID */
-			startsg->dma_address = startsg->dma_length = 0;
-
-			/* catch brokenness in SCSI layer */
-			ASSERT(startsg->length <= DMA_CHUNK_SIZE);
-
-			/*
-			** First make sure current dma stream won't
-			** exceed DMA_CHUNK_SIZE if we coalesce the
-			** next entry.
-			*/
-			if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask)
-			    > DMA_CHUNK_SIZE)
-				break;
-
-			/*
-			** Then look for virtually contiguous blocks.
-			**
-			** append the next transaction?
-			*/
-			vaddr = (unsigned long) sba_sg_address(startsg);
-			if  (vcontig_end == vaddr)
-			{
-				vcontig_len += startsg->length;
-				vcontig_end += startsg->length;
-				dma_len     += startsg->length;
-				continue;
-			}
-
-#ifdef DEBUG_LARGE_SG_ENTRIES
-			dump_run_sg = (vcontig_len > iovp_size);
-#endif
-
-			/*
-			** Not virtually contigous.
-			** Terminate prev chunk.
-			** Start a new chunk.
-			**
-			** Once we start a new VCONTIG chunk, dma_offset
-			** can't change. And we need the offset from the first
-			** chunk - not the last one. Ergo Successive chunks
-			** must start on page boundaries and dove tail
-			** with it's predecessor.
-			*/
-			vcontig_sg->dma_length = vcontig_len;
-
-			vcontig_sg = startsg;
-			vcontig_len = startsg->length;
-
-			/*
-			** 3) do the entries end/start on page boundaries?
-			**    Don't update vcontig_end until we've checked.
-			*/
-			if (DMA_CONTIG(vcontig_end, vaddr))
-			{
-				vcontig_end = vcontig_len + vaddr;
-				dma_len += vcontig_len;
-				continue;
-			} else {
-				break;
-			}
-		}
-
-		/*
-		** End of DMA Stream
-		** Terminate last VCONTIG block.
-		** Allocate space for DMA stream.
-		*/
-		vcontig_sg->dma_length = vcontig_len;
-		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
-		ASSERT(dma_len <= DMA_CHUNK_SIZE);
-		dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
-			| (sba_alloc_range(ioc, dma_len) << iovp_shift)
-			| dma_offset);
-		n_mappings++;
-	}
-
-	return n_mappings;
-}
-
-
-/**
- * sba_map_sg - map Scatter/Gather list
- * @dev: instance of PCI owned by the driver that's asking.
- * @sglist:  array of buffer/length pairs
- * @nents:  number of entries in list
- * @dir:  R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
-	struct ioc *ioc;
-	int coalesced, filled = 0;
-#ifdef ASSERT_PDIR_SANITY
-	unsigned long flags;
-#endif
-#ifdef ALLOW_IOV_BYPASS_SG
-	struct scatterlist *sg;
-#endif
-
-	DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
-
-#ifdef ALLOW_IOV_BYPASS_SG
-	ASSERT(to_pci_dev(dev)->dma_mask);
-	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
-		for (sg = sglist ; filled < nents ; filled++, sg++){
-			sg->dma_length = sg->length;
-			sg->dma_address = virt_to_bus(sba_sg_address(sg));
-		}
-		return filled;
-	}
-#endif
-	/* Fast path single entry scatterlists. */
-	if (nents == 1) {
-		sglist->dma_length = sglist->length;
-		sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
-		return 1;
-	}
-
-#ifdef ASSERT_PDIR_SANITY
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
-	{
-		sba_dump_sg(ioc, sglist, nents);
-		panic("Check before sba_map_sg()");
-	}
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-	prefetch(ioc->res_hint);
-
-	/*
-	** First coalesce the chunks and allocate I/O pdir space
-	**
-	** If this is one DMA stream, we can properly map using the
-	** correct virtual address associated with each DMA page.
-	** w/o this association, we wouldn't have coherent DMA!
-	** Access to the virtual address is what forces a two pass algorithm.
-	*/
-	coalesced = sba_coalesce_chunks(ioc, sglist, nents);
-
-	/*
-	** Program the I/O Pdir
-	**
-	** map the virtual addresses to the I/O Pdir
-	** o dma_address will contain the pdir index
-	** o dma_len will contain the number of bytes to map
-	** o address contains the virtual address.
-	*/
-	filled = sba_fill_pdir(ioc, sglist, nents);
-
-#ifdef ASSERT_PDIR_SANITY
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
-	{
-		sba_dump_sg(ioc, sglist, nents);
-		panic("Check after sba_map_sg()\n");
-	}
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-	ASSERT(coalesced == filled);
-	DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
-
-	return filled;
-}
-
-
-/**
- * sba_unmap_sg - unmap Scatter/Gather list
- * @dev: instance of PCI owned by the driver that's asking.
- * @sglist:  array of buffer/length pairs
- * @nents:  number of entries in list
- * @dir:  R/W or both.
- *
- * See Documentation/DMA-mapping.txt
- */
-void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
-{
-#ifdef ASSERT_PDIR_SANITY
-	struct ioc *ioc;
-	unsigned long flags;
-#endif
-
-	DBG_RUN_SG("%s() START %d entries,  %p,%x\n",
-		__FUNCTION__, nents, sba_sg_address(sglist), sglist->length);
-
-#ifdef ASSERT_PDIR_SANITY
-	ioc = GET_IOC(dev);
-	ASSERT(ioc);
-
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check before sba_unmap_sg()");
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-	while (nents && sglist->dma_length) {
-
-		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
-		sglist++;
-		nents--;
-	}
-
-	DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__,  nents);
-
-#ifdef ASSERT_PDIR_SANITY
-	spin_lock_irqsave(&ioc->res_lock, flags);
-	sba_check_pdir(ioc,"Check after sba_unmap_sg()");
-	spin_unlock_irqrestore(&ioc->res_lock, flags);
-#endif
-
-}
-
-/**************************************************************
-*
-*   Initialization and claim
-*
-***************************************************************/
-
-static void __init
-ioc_iova_init(struct ioc *ioc)
-{
-	int tcnfg;
-	int agp_found = 0;
-	struct pci_dev *device = NULL;
-#ifdef FULL_VALID_PDIR
-	unsigned long index;
-#endif
-
-	/*
-	** Firmware programs the base and size of a "safe IOVA space"
-	** (one that doesn't overlap memory or LMMIO space) in the
-	** IBASE and IMASK registers.
-	*/
-	ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL;
-	ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL;
-
-	ioc->iov_size = ~ioc->imask + 1;
-
-	DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
-		__FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
-		ioc->iov_size >> 20);
-
-	switch (iovp_size) {
-		case  4*1024: tcnfg = 0; break;
-		case  8*1024: tcnfg = 1; break;
-		case 16*1024: tcnfg = 2; break;
-		case 64*1024: tcnfg = 3; break;
-		default:
-			panic(PFX "Unsupported IOTLB page size %ldK",
-				iovp_size >> 10);
-			break;
-	}
-	WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG);
-
-	ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE;
-	ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL,
-						   get_order(ioc->pdir_size));
-	if (!ioc->pdir_base)
-		panic(PFX "Couldn't allocate I/O Page Table\n");
-
-#ifdef CONFIG_XEN
-	/* The page table needs to be pinned in Xen memory */
-	if (xen_create_contiguous_region((unsigned long)ioc->pdir_base,
-					 get_order(ioc->pdir_size), 0))
-		panic(PFX "Couldn't contiguously map I/O Page Table\n");
-#endif
-	memset(ioc->pdir_base, 0, ioc->pdir_size);
-
-	DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__,
-		iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
-
-	ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
-	WRITE_REG(virt_to_bus(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
-
-	/*
-	** If an AGP device is present, only use half of the IOV space
-	** for PCI DMA.  Unfortunately we can't know ahead of time
-	** whether GART support will actually be used, for now we
-	** can just key on an AGP device found in the system.
-	** We program the next pdir index after we stop w/ a key for
-	** the GART code to handshake on.
-	*/
-	for_each_pci_dev(device)	
-		agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
-
-	if (agp_found && reserve_sba_gart) {
-		printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n",
-		      ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2);
-		ioc->pdir_size /= 2;
-		((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE;
-	}
-#ifdef FULL_VALID_PDIR
-	/*
-  	** Check to see if the spill page has been allocated, we don't need more than
-	** one across multiple SBAs.
-	*/
-	if (!prefetch_spill_page) {
-		char *spill_poison = "SBAIOMMU POISON";
-		int poison_size = 16;
-		void *poison_addr, *addr;
-
-		addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size));
-		if (!addr)
-			panic(PFX "Couldn't allocate PDIR spill page\n");
-
-		poison_addr = addr;
-		for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size)
-			memcpy(poison_addr, spill_poison, poison_size);
-
-		prefetch_spill_page = virt_to_bus(addr);
-
-		DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page);
-	}
-	/*
-  	** Set all the PDIR entries valid w/ the spill page as the target
-	*/
-	for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++)
-		((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page);
-#endif
-
-	/* Clear I/O TLB of any possible entries */
-	WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
-	READ_REG(ioc->ioc_hpa + IOC_PCOM);
-
-	/* Enable IOVA translation */
-	WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE);
-	READ_REG(ioc->ioc_hpa + IOC_IBASE);
-}
-
-static void __init
-ioc_resource_init(struct ioc *ioc)
-{
-	spin_lock_init(&ioc->res_lock);
-#if DELAYED_RESOURCE_CNT > 0
-	spin_lock_init(&ioc->saved_lock);
-#endif
-
-	/* resource map size dictated by pdir_size */
-	ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
-	ioc->res_size >>= 3;  /* convert bit count to byte count */
-	DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size);
-
-	ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
-						 get_order(ioc->res_size));
-	if (!ioc->res_map)
-		panic(PFX "Couldn't allocate resource map\n");
-
-	memset(ioc->res_map, 0, ioc->res_size);
-	/* next available IOVP - circular search */
-	ioc->res_hint = (unsigned long *) ioc->res_map;
-
-#ifdef ASSERT_PDIR_SANITY
-	/* Mark first bit busy - ie no IOVA 0 */
-	ioc->res_map[0] = 0x1;
-	ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE;
-#endif
-#ifdef FULL_VALID_PDIR
-	/* Mark the last resource used so we don't prefetch beyond IOVA space */
-	ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */
-	ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF
-							      | prefetch_spill_page);
-#endif
-
-	DBG_INIT("%s() res_map %x %p\n", __FUNCTION__,
-		 ioc->res_size, (void *) ioc->res_map);
-}
-
-static void __init
-ioc_sac_init(struct ioc *ioc)
-{
-	struct pci_dev *sac = NULL;
-	struct pci_controller *controller = NULL;
-
-	/*
-	 * pci_alloc_coherent() must return a DMA address which is
-	 * SAC (single address cycle) addressable, so allocate a
-	 * pseudo-device to enforce that.
-	 */
-	sac = kmalloc(sizeof(*sac), GFP_KERNEL);
-	if (!sac)
-		panic(PFX "Couldn't allocate struct pci_dev");
-	memset(sac, 0, sizeof(*sac));
-
-	controller = kmalloc(sizeof(*controller), GFP_KERNEL);
-	if (!controller)
-		panic(PFX "Couldn't allocate struct pci_controller");
-	memset(controller, 0, sizeof(*controller));
-
-	controller->iommu = ioc;
-	sac->sysdata = controller;
-	sac->dma_mask = 0xFFFFFFFFUL;
-#ifdef CONFIG_PCI
-	sac->dev.bus = &pci_bus_type;
-#endif
-	ioc->sac_only_dev = sac;
-}
-
-static void __init
-ioc_zx1_init(struct ioc *ioc)
-{
-	unsigned long rope_config;
-	unsigned int i;
-
-	if (ioc->rev < 0x20)
-		panic(PFX "IOC 2.0 or later required for IOMMU support\n");
-
-	/* 38 bit memory controller + extra bit for range displaced by MMIO */
-	ioc->dma_mask = (0x1UL << 39) - 1;
-
-	/*
-	** Clear ROPE(N)_CONFIG AO bit.
-	** Disables "NT Ordering" (~= !"Relaxed Ordering")
-	** Overrides bit 1 in DMA Hint Sets.
-	** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701.
-	*/
-	for (i=0; i<(8*8); i+=8) {
-		rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i);
-		rope_config &= ~IOC_ROPE_AO;
-		WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i);
-	}
-}
-
-typedef void (initfunc)(struct ioc *);
-
-struct ioc_iommu {
-	u32 func_id;
-	char *name;
-	initfunc *init;
-};
-
-static struct ioc_iommu ioc_iommu_info[] __initdata = {
-	{ ZX1_IOC_ID, "zx1", ioc_zx1_init },
-	{ ZX2_IOC_ID, "zx2", NULL },
-	{ SX1000_IOC_ID, "sx1000", NULL },
-	{ SX2000_IOC_ID, "sx2000", NULL },
-};
-
-static struct ioc * __init
-ioc_init(u64 hpa, void *handle)
-{
-	struct ioc *ioc;
-	struct ioc_iommu *info;
-
-	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
-	if (!ioc)
-		return NULL;
-
-	memset(ioc, 0, sizeof(*ioc));
-
-	ioc->next = ioc_list;
-	ioc_list = ioc;
-
-	ioc->handle = handle;
-	ioc->ioc_hpa = ioremap(hpa, 0x1000);
-
-	ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
-	ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL;
-	ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL;	/* conservative */
-
-	for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) {
-		if (ioc->func_id == info->func_id) {
-			ioc->name = info->name;
-			if (info->init)
-				(info->init)(ioc);
-		}
-	}
-
-	iovp_size = (1 << iovp_shift);
-	iovp_mask = ~(iovp_size - 1);
-
-	DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__,
-		PAGE_SIZE >> 10, iovp_size >> 10);
-
-	if (!ioc->name) {
-		ioc->name = kmalloc(24, GFP_KERNEL);
-		if (ioc->name)
-			sprintf((char *) ioc->name, "Unknown (%04x:%04x)",
-				ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF);
-		else
-			ioc->name = "Unknown";
-	}
-
-	ioc_iova_init(ioc);
-	ioc_resource_init(ioc);
-	ioc_sac_init(ioc);
-
-	if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
-		ia64_max_iommu_merge_mask = ~iovp_mask;
-
-	printk(KERN_INFO PFX
-		"%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
-		ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
-		hpa, ioc->iov_size >> 20, ioc->ibase);
-
-	return ioc;
-}
-
-
-
-/**************************************************************************
-**
-**   SBA initialization code (HW and SW)
-**
-**   o identify SBA chip itself
-**   o FIXME: initialize DMA hints for reasonable defaults
-**
-**************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-static void *
-ioc_start(struct seq_file *s, loff_t *pos)
-{
-	struct ioc *ioc;
-	loff_t n = *pos;
-
-	for (ioc = ioc_list; ioc; ioc = ioc->next)
-		if (!n--)
-			return ioc;
-
-	return NULL;
-}
-
-static void *
-ioc_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	struct ioc *ioc = v;
-
-	++*pos;
-	return ioc->next;
-}
-
-static void
-ioc_stop(struct seq_file *s, void *v)
-{
-}
-
-static int
-ioc_show(struct seq_file *s, void *v)
-{
-	struct ioc *ioc = v;
-	unsigned long *res_ptr = (unsigned long *)ioc->res_map;
-	int i, used = 0;
-
-	seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
-		ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
-#ifdef CONFIG_NUMA
-	if (ioc->node != MAX_NUMNODES)
-		seq_printf(s, "NUMA node       : %d\n", ioc->node);
-#endif
-	seq_printf(s, "IOVA size       : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
-	seq_printf(s, "IOVA page size  : %ld kb\n", iovp_size/1024);
-
-	for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr)
-		used += hweight64(*res_ptr);
-
-	seq_printf(s, "PDIR size       : %d entries\n", ioc->pdir_size >> 3);
-	seq_printf(s, "PDIR used       : %d entries\n", used);
-
-#ifdef PDIR_SEARCH_TIMING
-	{
-		unsigned long i = 0, avg = 0, min, max;
-		min = max = ioc->avg_search[0];
-		for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
-			avg += ioc->avg_search[i];
-			if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
-			if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
-		}
-		avg /= SBA_SEARCH_SAMPLE;
-		seq_printf(s, "Bitmap search   : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n",
-		           min, avg, max);
-	}
-#endif
-#ifndef ALLOW_IOV_BYPASS
-	 seq_printf(s, "IOVA bypass disabled\n");
-#endif
-	return 0;
-}
-
-static struct seq_operations ioc_seq_ops = {
-	.start = ioc_start,
-	.next  = ioc_next,
-	.stop  = ioc_stop,
-	.show  = ioc_show
-};
-
-static int
-ioc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ioc_seq_ops);
-}
-
-static struct file_operations ioc_fops = {
-	.open    = ioc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
-static void __init
-ioc_proc_init(void)
-{
-	struct proc_dir_entry *dir, *entry;
-
-	dir = proc_mkdir("bus/mckinley", NULL);
-	if (!dir)
-		return;
-
-	entry = create_proc_entry(ioc_list->name, 0, dir);
-	if (entry)
-		entry->proc_fops = &ioc_fops;
-}
-#endif
-
-static void
-sba_connect_bus(struct pci_bus *bus)
-{
-	acpi_handle handle, parent;
-	acpi_status status;
-	struct ioc *ioc;
-
-	if (!PCI_CONTROLLER(bus))
-		panic(PFX "no sysdata on bus %d!\n", bus->number);
-
-	if (PCI_CONTROLLER(bus)->iommu)
-		return;
-
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
-	if (!handle)
-		return;
-
-	/*
-	 * The IOC scope encloses PCI root bridges in the ACPI
-	 * namespace, so work our way out until we find an IOC we
-	 * claimed previously.
-	 */
-	do {
-		for (ioc = ioc_list; ioc; ioc = ioc->next)
-			if (ioc->handle == handle) {
-				PCI_CONTROLLER(bus)->iommu = ioc;
-				return;
-			}
-
-		status = acpi_get_parent(handle, &parent);
-		handle = parent;
-	} while (ACPI_SUCCESS(status));
-
-	printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
-}
-
-#ifdef CONFIG_NUMA
-static void __init
-sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
-{
-	unsigned int node;
-	int pxm;
-
-	ioc->node = MAX_NUMNODES;
-
-	pxm = acpi_get_pxm(handle);
-
-	if (pxm < 0)
-		return;
-
-	node = pxm_to_node(pxm);
-
-	if (node >= MAX_NUMNODES || !node_online(node))
-		return;
-
-	ioc->node = node;
-	return;
-}
-#else
-#define sba_map_ioc_to_node(ioc, handle)
-#endif
-
-static int __init
-acpi_sba_ioc_add(struct acpi_device *device)
-{
-	struct ioc *ioc;
-	acpi_status status;
-	u64 hpa, length;
-	struct acpi_buffer buffer;
-	struct acpi_device_info *dev_info;
-
-	status = hp_acpi_csr_space(device->handle, &hpa, &length);
-	if (ACPI_FAILURE(status))
-		return 1;
-
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(device->handle, &buffer);
-	if (ACPI_FAILURE(status))
-		return 1;
-	dev_info = buffer.pointer;
-
-	/*
-	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
-	 * root bridges, and its CSR space includes the IOC function.
-	 */
-	if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
-		hpa += ZX1_IOC_OFFSET;
-		/* zx1 based systems default to kernel page size iommu pages */
-		if (!iovp_shift)
-			iovp_shift = min(PAGE_SHIFT, 16);
-	}
-	kfree(dev_info);
-
-	/*
-	 * default anything not caught above or specified on cmdline to 4k
-	 * iommu page size
-	 */
-	if (!iovp_shift)
-		iovp_shift = 12;
-
-	ioc = ioc_init(hpa, device->handle);
-	if (!ioc)
-		return 1;
-
-	/* setup NUMA node association */
-	sba_map_ioc_to_node(ioc, device->handle);
-	return 0;
-}
-
-static struct acpi_driver acpi_sba_ioc_driver = {
-	.name		= "IOC IOMMU Driver",
-	.ids		= "HWP0001,HWP0004",
-	.ops		= {
-		.add	= acpi_sba_ioc_add,
-	},
-};
-
-static int __init
-sba_init(void)
-{
-	if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb"))
-		return 0;
-
-	acpi_bus_register_driver(&acpi_sba_ioc_driver);
-	if (!ioc_list) {
-#ifdef CONFIG_IA64_GENERIC
-		extern int swiotlb_late_init_with_default_size (size_t size);
-
-		/*
-		 * If we didn't find something sba_iommu can claim, we
-		 * need to setup the swiotlb and switch to the dig machvec.
-		 */
-		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
-			panic("Unable to find SBA IOMMU or initialize "
-			      "software I/O TLB: Try machvec=dig boot option");
-		machvec_init("dig");
-#else
-		panic("Unable to find SBA IOMMU: Try a generic or DIG kernel");
-#endif
-		return 0;
-	}
-
-#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB)
-	/*
-	 * hpzx1_swiotlb needs to have a fairly small swiotlb bounce
-	 * buffer setup to support devices with smaller DMA masks than
-	 * sba_iommu can handle.
-	 */
-	if (ia64_platform_is("hpzx1_swiotlb")) {
-		extern void hwsw_init(void);
-
-		hwsw_init();
-	}
-#endif
-
-#ifdef CONFIG_PCI
-	{
-		struct pci_bus *b = NULL;
-		while ((b = pci_find_next_bus(b)) != NULL)
-			sba_connect_bus(b);
-	}
-#endif
-
-#ifdef CONFIG_PROC_FS
-	ioc_proc_init();
-#endif
-	return 0;
-}
-
-subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
-
-static int __init
-nosbagart(char *str)
-{
-	reserve_sba_gart = 0;
-	return 1;
-}
-
-int
-sba_dma_supported (struct device *dev, u64 mask)
-{
-	/* make sure it's at least 32bit capable */
-	return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
-}
-
-int
-sba_dma_mapping_error (dma_addr_t dma_addr)
-{
-	return 0;
-}
-
-__setup("nosbagart", nosbagart);
-
-static int __init
-sba_page_override(char *str)
-{
-	unsigned long page_size;
-
-	page_size = memparse(str, &str);
-	switch (page_size) {
-		case 4096:
-		case 8192:
-		case 16384:
-		case 65536:
-			iovp_shift = ffs(page_size) - 1;
-			break;
-		default:
-			printk("%s: unknown/unsupported iommu page size %ld\n",
-			       __FUNCTION__, page_size);
-	}
-
-	return 1;
-}
-
-__setup("sbapagesize=",sba_page_override);
-
-EXPORT_SYMBOL(sba_dma_mapping_error);
-EXPORT_SYMBOL(sba_map_single);
-EXPORT_SYMBOL(sba_unmap_single);
-EXPORT_SYMBOL(sba_map_sg);
-EXPORT_SYMBOL(sba_unmap_sg);
-EXPORT_SYMBOL(sba_dma_supported);
-EXPORT_SYMBOL(sba_alloc_coherent);
-EXPORT_SYMBOL(sba_free_coherent);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile b/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
deleted file mode 100644
index ad8215a3c5..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
+++ /dev/null
@@ -1,63 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-extra-y	:= head.o init_task.o vmlinux.lds
-
-obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
-	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
-	 unwind.o mca.o mca_asm.o topology.o
-
-obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
-obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1)	+= acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= acpi-processor.o
-endif
-
-obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
-obj-$(CONFIG_IOSAPIC)		+= iosapic.o
-obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_SMP)		+= smp.o smpboot.o
-obj-$(CONFIG_NUMA)		+= numa.o
-obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
-obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
-obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
-obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
-obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
-obj-$(CONFIG_AUDIT)		+= audit.o
-mca_recovery-y			+= mca_drv.o mca_drv_asm.o
-
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
-
-extra-y += gate.so gate-syms.o gate.lds gate.o
-
-# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
-CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
-
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
-
-quiet_cmd_gate = GATE $@
-      cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
-
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
-		     $(call ld-option, -Wl$(comma)--hash-style=sysv)
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
-
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
-
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
-	$(call if_changed,gate)
-
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c b/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c
deleted file mode 100644
index 54e2fb8ab7..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/acpi.c
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- *  acpi.c - Architecture-Specific Low-Level ACPI Support
- *
- *  Copyright (C) 1999 VA Linux Systems
- *  Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- *  Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *  Copyright (C) 2000 Intel Corp.
- *  Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
- *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
- *  Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
- *  Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
- *  Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/irq.h>
-#include <linux/acpi.h>
-#include <linux/efi.h>
-#include <linux/mmzone.h>
-#include <linux/nodemask.h>
-#include <asm/io.h>
-#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/numa.h>
-#include <asm/sal.h>
-#include <asm/cyclone.h>
-
-#define BAD_MADT_ENTRY(entry, end) (                                        \
-		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
-		((acpi_table_entry_header *)entry)->length < sizeof(*entry))
-
-#define PREFIX			"ACPI: "
-
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
-unsigned char acpi_kbd_controller_present = 1;
-unsigned char acpi_legacy_devices;
-
-unsigned int acpi_cpei_override;
-unsigned int acpi_cpei_phys_cpuid;
-
-#define MAX_SAPICS 256
-u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = {[0 ... MAX_SAPICS - 1] = -1 };
-
-EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
-
-const char *acpi_get_sysname(void)
-{
-#ifdef CONFIG_IA64_GENERIC
-	unsigned long rsdp_phys;
-	struct acpi20_table_rsdp *rsdp;
-	struct acpi_table_xsdt *xsdt;
-	struct acpi_table_header *hdr;
-
-	rsdp_phys = acpi_find_rsdp();
-	if (!rsdp_phys) {
-		printk(KERN_ERR
-		       "ACPI 2.0 RSDP not found, default to \"dig\"\n");
-		return "dig";
-	}
-
-	rsdp = (struct acpi20_table_rsdp *)__va(rsdp_phys);
-	if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
-		printk(KERN_ERR
-		       "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
-		return "dig";
-	}
-
-	xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address);
-	hdr = &xsdt->header;
-	if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
-		printk(KERN_ERR
-		       "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
-		return "dig";
-	}
-
-	if (!strcmp(hdr->oem_id, "HP")) {
-		return "hpzx1";
-	} else if (!strcmp(hdr->oem_id, "SGI")) {
-		return "sn2";
-#ifdef CONFIG_XEN
-	} else if (is_running_on_xen() && !strcmp(hdr->oem_id, "XEN")) {
-		return "xen";
-#endif
-	}
-
-	return "dig";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
-	return "hpsim";
-# elif defined (CONFIG_IA64_HP_ZX1)
-	return "hpzx1";
-# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
-	return "hpzx1_swiotlb";
-# elif defined (CONFIG_IA64_SGI_SN2)
-	return "sn2";
-# elif defined (CONFIG_IA64_DIG)
-	return "dig";
-# elif defined (CONFIG_IA64_XEN)
-	return "xen";
-# else
-#	error Unknown platform.  Fix acpi.c.
-# endif
-#endif
-}
-
-#ifdef CONFIG_ACPI
-
-#define ACPI_MAX_PLATFORM_INTERRUPTS	256
-
-/* Array to record platform interrupt vectors for generic interrupt routing. */
-int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
-	[0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
-};
-
-enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
-
-/*
- * Interrupt routing API for device drivers.  Provides interrupt vector for
- * a generic platform event.  Currently only CPEI is implemented.
- */
-int acpi_request_vector(u32 int_type)
-{
-	int vector = -1;
-
-	if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
-		/* corrected platform error interrupt */
-		vector = platform_intr_list[int_type];
-	} else
-		printk(KERN_ERR
-		       "acpi_request_vector(): invalid interrupt type\n");
-	return vector;
-}
-
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
-	return __va(phys_addr);
-}
-
-/* --------------------------------------------------------------------------
-                            Boot-time Table Parsing
-   -------------------------------------------------------------------------- */
-
-static int total_cpus __initdata;
-static int available_cpus __initdata;
-struct acpi_table_madt *acpi_madt __initdata;
-static u8 has_8259;
-
-static int __init
-acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
-			  const unsigned long end)
-{
-	struct acpi_table_lapic_addr_ovr *lapic;
-
-	lapic = (struct acpi_table_lapic_addr_ovr *)header;
-
-	if (BAD_MADT_ENTRY(lapic, end))
-		return -EINVAL;
-
-	if (lapic->address) {
-		iounmap(ipi_base_addr);
-		ipi_base_addr = ioremap(lapic->address, 0);
-	}
-	return 0;
-}
-
-static int __init
-acpi_parse_lsapic(acpi_table_entry_header * header, const unsigned long end)
-{
-	struct acpi_table_lsapic *lsapic;
-
-	lsapic = (struct acpi_table_lsapic *)header;
-
-	if (BAD_MADT_ENTRY(lsapic, end))
-		return -EINVAL;
-
-	if (lsapic->flags.enabled) {
-#ifdef CONFIG_SMP
-		smp_boot_data.cpu_phys_id[available_cpus] =
-		    (lsapic->id << 8) | lsapic->eid;
-#endif
-		ia64_acpiid_to_sapicid[lsapic->acpi_id] =
-		    (lsapic->id << 8) | lsapic->eid;
-		++available_cpus;
-	}
-
-	total_cpus++;
-	return 0;
-}
-
-static int __init
-acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
-{
-	struct acpi_table_lapic_nmi *lacpi_nmi;
-
-	lacpi_nmi = (struct acpi_table_lapic_nmi *)header;
-
-	if (BAD_MADT_ENTRY(lacpi_nmi, end))
-		return -EINVAL;
-
-	/* TBD: Support lapic_nmi entries */
-	return 0;
-}
-
-static int __init
-acpi_parse_iosapic(acpi_table_entry_header * header, const unsigned long end)
-{
-	struct acpi_table_iosapic *iosapic;
-
-	iosapic = (struct acpi_table_iosapic *)header;
-
-	if (BAD_MADT_ENTRY(iosapic, end))
-		return -EINVAL;
-
-	return iosapic_init(iosapic->address, iosapic->global_irq_base);
-}
-
-static unsigned int __initdata acpi_madt_rev;
-
-static int __init
-acpi_parse_plat_int_src(acpi_table_entry_header * header,
-			const unsigned long end)
-{
-	struct acpi_table_plat_int_src *plintsrc;
-	int vector;
-
-	plintsrc = (struct acpi_table_plat_int_src *)header;
-
-	if (BAD_MADT_ENTRY(plintsrc, end))
-		return -EINVAL;
-
-	/*
-	 * Get vector assignment for this interrupt, set attributes,
-	 * and program the IOSAPIC routing table.
-	 */
-	vector = iosapic_register_platform_intr(plintsrc->type,
-						plintsrc->global_irq,
-						plintsrc->iosapic_vector,
-						plintsrc->eid,
-						plintsrc->id,
-						(plintsrc->flags.polarity ==
-						 1) ? IOSAPIC_POL_HIGH :
-						IOSAPIC_POL_LOW,
-						(plintsrc->flags.trigger ==
-						 1) ? IOSAPIC_EDGE :
-						IOSAPIC_LEVEL);
-
-	platform_intr_list[plintsrc->type] = vector;
-	if (acpi_madt_rev > 1) {
-		acpi_cpei_override = plintsrc->plint_flags.cpei_override_flag;
-	}
-
-	/*
-	 * Save the physical id, so we can check when its being removed
-	 */
-	acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff;
-
-	return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-unsigned int can_cpei_retarget(void)
-{
-	extern int cpe_vector;
-	extern unsigned int force_cpei_retarget;
-
-	/*
-	 * Only if CPEI is supported and the override flag
-	 * is present, otherwise return that its re-targettable
-	 * if we are in polling mode.
-	 */
-	if (cpe_vector > 0) {
-		if (acpi_cpei_override || force_cpei_retarget)
-			return 1;
-		else
-			return 0;
-	}
-	return 1;
-}
-
-unsigned int is_cpu_cpei_target(unsigned int cpu)
-{
-	unsigned int logical_id;
-
-	logical_id = cpu_logical_id(acpi_cpei_phys_cpuid);
-
-	if (logical_id == cpu)
-		return 1;
-	else
-		return 0;
-}
-
-void set_cpei_target_cpu(unsigned int cpu)
-{
-	acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
-}
-#endif
-
-unsigned int get_cpei_target_cpu(void)
-{
-	return acpi_cpei_phys_cpuid;
-}
-
-static int __init
-acpi_parse_int_src_ovr(acpi_table_entry_header * header,
-		       const unsigned long end)
-{
-	struct acpi_table_int_src_ovr *p;
-
-	p = (struct acpi_table_int_src_ovr *)header;
-
-	if (BAD_MADT_ENTRY(p, end))
-		return -EINVAL;
-
-	iosapic_override_isa_irq(p->bus_irq, p->global_irq,
-				 (p->flags.polarity ==
-				  1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
-				 (p->flags.trigger ==
-				  1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
-	return 0;
-}
-
-static int __init
-acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
-{
-	struct acpi_table_nmi_src *nmi_src;
-
-	nmi_src = (struct acpi_table_nmi_src *)header;
-
-	if (BAD_MADT_ENTRY(nmi_src, end))
-		return -EINVAL;
-
-	/* TBD: Support nimsrc entries */
-	return 0;
-}
-
-static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-	if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) {
-
-		/*
-		 * Unfortunately ITC_DRIFT is not yet part of the
-		 * official SAL spec, so the ITC_DRIFT bit is not
-		 * set by the BIOS on this hardware.
-		 */
-		sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
-
-		cyclone_setup();
-	}
-}
-
-static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
-{
-	if (!phys_addr || !size)
-		return -EINVAL;
-
-	acpi_madt = (struct acpi_table_madt *)__va(phys_addr);
-
-	acpi_madt_rev = acpi_madt->header.revision;
-
-	/* remember the value for reference after free_initmem() */
-#ifdef CONFIG_ITANIUM
-	has_8259 = 1;		/* Firmware on old Itanium systems is broken */
-#else
-	has_8259 = acpi_madt->flags.pcat_compat;
-#endif
-	iosapic_system_init(has_8259);
-
-	/* Get base address of IPI Message Block */
-
-	if (acpi_madt->lapic_address)
-		ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
-
-	printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
-
-	acpi_madt_oem_check(acpi_madt->header.oem_id,
-			    acpi_madt->header.oem_table_id);
-
-	return 0;
-}
-
-#ifdef CONFIG_ACPI_NUMA
-
-#undef SLIT_DEBUG
-
-#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
-
-static int __initdata srat_num_cpus;	/* number of cpus */
-static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
-#define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
-#define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
-static struct acpi_table_slit __initdata *slit_table;
-
-static int get_processor_proximity_domain(struct acpi_table_processor_affinity *pa)
-{
-	int pxm;
-
-	pxm = pa->proximity_domain;
-	if (ia64_platform_is("sn2"))
-		pxm += pa->reserved[0] << 8;
-	return pxm;
-}
-
-static int get_memory_proximity_domain(struct acpi_table_memory_affinity *ma)
-{
-	int pxm;
-
-	pxm = ma->proximity_domain;
-	if (ia64_platform_is("sn2"))
-		pxm += ma->reserved1[0] << 8;
-	return pxm;
-}
-
-/*
- * ACPI 2.0 SLIT (System Locality Information Table)
- * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
- */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-	u32 len;
-
-	len = sizeof(struct acpi_table_header) + 8
-	    + slit->localities * slit->localities;
-	if (slit->header.length != len) {
-		printk(KERN_ERR
-		       "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
-		       len, slit->header.length);
-		memset(numa_slit, 10, sizeof(numa_slit));
-		return;
-	}
-	slit_table = slit;
-}
-
-void __init
-acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
-{
-	int pxm;
-
-	if (!pa->flags.enabled)
-		return;
-
-	pxm = get_processor_proximity_domain(pa);
-
-	/* record this node in proximity bitmap */
-	pxm_bit_set(pxm);
-
-	node_cpuid[srat_num_cpus].phys_id =
-	    (pa->apic_id << 8) | (pa->lsapic_eid);
-	/* nid should be overridden as logical node id later */
-	node_cpuid[srat_num_cpus].nid = pxm;
-	srat_num_cpus++;
-}
-
-void __init
-acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
-{
-	unsigned long paddr, size;
-	int pxm;
-	struct node_memblk_s *p, *q, *pend;
-
-	pxm = get_memory_proximity_domain(ma);
-
-	/* fill node memory chunk structure */
-	paddr = ma->base_addr_hi;
-	paddr = (paddr << 32) | ma->base_addr_lo;
-	size = ma->length_hi;
-	size = (size << 32) | ma->length_lo;
-
-	/* Ignore disabled entries */
-	if (!ma->flags.enabled)
-		return;
-
-	/* record this node in proximity bitmap */
-	pxm_bit_set(pxm);
-
-	/* Insertion sort based on base address */
-	pend = &node_memblk[num_node_memblks];
-	for (p = &node_memblk[0]; p < pend; p++) {
-		if (paddr < p->start_paddr)
-			break;
-	}
-	if (p < pend) {
-		for (q = pend - 1; q >= p; q--)
-			*(q + 1) = *q;
-	}
-	p->start_paddr = paddr;
-	p->size = size;
-	p->nid = pxm;
-	num_node_memblks++;
-}
-
-void __init acpi_numa_arch_fixup(void)
-{
-	int i, j, node_from, node_to;
-
-	/* If there's no SRAT, fix the phys_id and mark node 0 online */
-	if (srat_num_cpus == 0) {
-		node_set_online(0);
-		node_cpuid[0].phys_id = hard_smp_processor_id();
-		return;
-	}
-
-	/*
-	 * MCD - This can probably be dropped now.  No need for pxm ID to node ID
-	 * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
-	 */
-	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
-		if (pxm_bit_test(i)) {
-			int nid = acpi_map_pxm_to_node(i);
-			node_set_online(nid);
-		}
-	}
-
-	/* set logical node id in memory chunk structure */
-	for (i = 0; i < num_node_memblks; i++)
-		node_memblk[i].nid = pxm_to_node(node_memblk[i].nid);
-
-	/* assign memory bank numbers for each chunk on each node */
-	for_each_online_node(i) {
-		int bank;
-
-		bank = 0;
-		for (j = 0; j < num_node_memblks; j++)
-			if (node_memblk[j].nid == i)
-				node_memblk[j].bank = bank++;
-	}
-
-	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
-		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
-
-	printk(KERN_INFO "Number of logical nodes in system = %d\n",
-	       num_online_nodes());
-	printk(KERN_INFO "Number of memory chunks in system = %d\n",
-	       num_node_memblks);
-
-	if (!slit_table)
-		return;
-	memset(numa_slit, -1, sizeof(numa_slit));
-	for (i = 0; i < slit_table->localities; i++) {
-		if (!pxm_bit_test(i))
-			continue;
-		node_from = pxm_to_node(i);
-		for (j = 0; j < slit_table->localities; j++) {
-			if (!pxm_bit_test(j))
-				continue;
-			node_to = pxm_to_node(j);
-			node_distance(node_from, node_to) =
-			    slit_table->entry[i * slit_table->localities + j];
-		}
-	}
-
-#ifdef SLIT_DEBUG
-	printk("ACPI 2.0 SLIT locality table:\n");
-	for_each_online_node(i) {
-		for_each_online_node(j)
-		    printk("%03d ", node_distance(i, j));
-		printk("\n");
-	}
-#endif
-}
-#endif				/* CONFIG_ACPI_NUMA */
-
-/*
- * success: return IRQ number (>=0)
- * failure: return < 0
- */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
-{
-	if (has_8259 && gsi < 16)
-		return isa_irq_to_vector(gsi);
-
-	return iosapic_register_intr(gsi,
-				     (polarity ==
-				      ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH :
-				     IOSAPIC_POL_LOW,
-				     (triggering ==
-				      ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
-				     IOSAPIC_LEVEL);
-}
-
-EXPORT_SYMBOL(acpi_register_gsi);
-
-void acpi_unregister_gsi(u32 gsi)
-{
-	iosapic_unregister_intr(gsi);
-}
-
-EXPORT_SYMBOL(acpi_unregister_gsi);
-
-static int __init acpi_parse_fadt(unsigned long phys_addr, unsigned long size)
-{
-	struct acpi_table_header *fadt_header;
-	struct fadt_descriptor *fadt;
-
-	if (!phys_addr || !size)
-		return -EINVAL;
-
-	fadt_header = (struct acpi_table_header *)__va(phys_addr);
-	if (fadt_header->revision != 3)
-		return -ENODEV;	/* Only deal with ACPI 2.0 FADT */
-
-	fadt = (struct fadt_descriptor *)fadt_header;
-
-	if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
-		acpi_kbd_controller_present = 0;
-
-	if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
-		acpi_legacy_devices = 1;
-
-	acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
-	return 0;
-}
-
-unsigned long __init acpi_find_rsdp(void)
-{
-	unsigned long rsdp_phys = 0;
-
-	if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
-		rsdp_phys = efi.acpi20;
-	else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
-		printk(KERN_WARNING PREFIX
-		       "v1.0/r0.71 tables no longer supported\n");
-	return rsdp_phys;
-}
-
-int __init acpi_boot_init(void)
-{
-
-	/*
-	 * MADT
-	 * ----
-	 * Parse the Multiple APIC Description Table (MADT), if exists.
-	 * Note that this table provides platform SMP configuration
-	 * information -- the successor to MPS tables.
-	 */
-
-	if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
-		printk(KERN_ERR PREFIX "Can't find MADT\n");
-		goto skip_madt;
-	}
-
-	/* Local APIC */
-
-	if (acpi_table_parse_madt
-	    (ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX
-		       "Error parsing LAPIC address override entry\n");
-
-	if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS)
-	    < 1)
-		printk(KERN_ERR PREFIX
-		       "Error parsing MADT - no LAPIC entries\n");
-
-	if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0)
-	    < 0)
-		printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
-
-	/* I/O APIC */
-
-	if (acpi_table_parse_madt
-	    (ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
-		printk(KERN_ERR PREFIX
-		       "Error parsing MADT - no IOSAPIC entries\n");
-
-	/* System-Level Interrupt Routing */
-
-	if (acpi_table_parse_madt
-	    (ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src,
-	     ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
-		printk(KERN_ERR PREFIX
-		       "Error parsing platform interrupt source entry\n");
-
-	if (acpi_table_parse_madt
-	    (ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
-		printk(KERN_ERR PREFIX
-		       "Error parsing interrupt source overrides entry\n");
-
-	if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
-		printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
-      skip_madt:
-
-	/*
-	 * FADT says whether a legacy keyboard controller is present.
-	 * The FADT also contains an SCI_INT line, by which the system
-	 * gets interrupts such as power and sleep buttons.  If it's not
-	 * on a Legacy interrupt, it needs to be setup.
-	 */
-	if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
-		printk(KERN_ERR PREFIX "Can't find FADT\n");
-
-#ifdef CONFIG_SMP
-	if (available_cpus == 0) {
-		printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
-		printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
-		smp_boot_data.cpu_phys_id[available_cpus] =
-		    hard_smp_processor_id();
-		available_cpus = 1;	/* We've got at least one of these, no? */
-	}
-	smp_boot_data.cpu_count = available_cpus;
-
-	smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
-	if (srat_num_cpus == 0) {
-		int cpu, i = 1;
-		for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
-			if (smp_boot_data.cpu_phys_id[cpu] !=
-			    hard_smp_processor_id())
-				node_cpuid[i++].phys_id =
-				    smp_boot_data.cpu_phys_id[cpu];
-	}
-# endif
-#endif
-#ifdef CONFIG_ACPI_NUMA
-	build_cpu_to_node_map();
-#endif
-	/* Make boot-up look pretty */
-	printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
-	       total_cpus);
-	return 0;
-}
-
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
-{
-	int vector;
-
-	if (has_8259 && gsi < 16)
-		*irq = isa_irq_to_vector(gsi);
-	else {
-		vector = gsi_to_vector(gsi);
-		if (vector == -1)
-			return -1;
-
-		*irq = vector;
-	}
-	return 0;
-}
-
-/*
- *  ACPI based hotplug CPU support
- */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static
-int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
-{
-#ifdef CONFIG_ACPI_NUMA
-	int pxm_id;
-
-	pxm_id = acpi_get_pxm(handle);
-
-	/*
-	 * Assuming that the container driver would have set the proximity
-	 * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag
-	 */
-	node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id);
-
-	node_cpuid[cpu].phys_id = physid;
-#endif
-	return (0);
-}
-
-int additional_cpus __initdata = -1;
-
-static __init int setup_additional_cpus(char *s)
-{
-	if (s)
-		additional_cpus = simple_strtol(s, NULL, 0);
-
-	return 0;
-}
-
-early_param("additional_cpus", setup_additional_cpus);
-
-/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- */
-__init void prefill_possible_map(void)
-{
-	int i;
-	int possible, disabled_cpus;
-
-	disabled_cpus = total_cpus - available_cpus;
-
- 	if (additional_cpus == -1) {
- 		if (disabled_cpus > 0)
-			additional_cpus = disabled_cpus;
- 		else
-			additional_cpus = 0;
- 	}
-
-	possible = available_cpus + additional_cpus;
-
-	if (possible > NR_CPUS)
-		possible = NR_CPUS;
-
-	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
-		possible, max((possible - available_cpus), 0));
-
-	for (i = 0; i < possible; i++)
-		cpu_set(i, cpu_possible_map);
-}
-
-int acpi_map_lsapic(acpi_handle handle, int *pcpu)
-{
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	struct acpi_table_lsapic *lsapic;
-	cpumask_t tmp_map;
-	long physid;
-	int cpu;
-
-	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-		return -EINVAL;
-
-	if (!buffer.length || !buffer.pointer)
-		return -EINVAL;
-
-	obj = buffer.pointer;
-	if (obj->type != ACPI_TYPE_BUFFER ||
-	    obj->buffer.length < sizeof(*lsapic)) {
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
-
-	if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
-	    (!lsapic->flags.enabled)) {
-		kfree(buffer.pointer);
-		return -EINVAL;
-	}
-
-	physid = ((lsapic->id << 8) | (lsapic->eid));
-
-	kfree(buffer.pointer);
-	buffer.length = ACPI_ALLOCATE_BUFFER;
-	buffer.pointer = NULL;
-
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-	if (cpu >= NR_CPUS)
-		return -EINVAL;
-
-	acpi_map_cpu2node(handle, cpu, physid);
-
-	cpu_set(cpu, cpu_present_map);
-	ia64_cpu_to_sapicid[cpu] = physid;
-	ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
-
-	*pcpu = cpu;
-	return (0);
-}
-
-EXPORT_SYMBOL(acpi_map_lsapic);
-
-int acpi_unmap_lsapic(int cpu)
-{
-	int i;
-
-	for (i = 0; i < MAX_SAPICS; i++) {
-		if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
-			ia64_acpiid_to_sapicid[i] = -1;
-			break;
-		}
-	}
-	ia64_cpu_to_sapicid[cpu] = -1;
-	cpu_clear(cpu, cpu_present_map);
-
-#ifdef CONFIG_ACPI_NUMA
-	/* NUMA specific cleanup's */
-#endif
-
-	return (0);
-}
-
-EXPORT_SYMBOL(acpi_unmap_lsapic);
-#endif				/* CONFIG_ACPI_HOTPLUG_CPU */
-
-#ifdef CONFIG_ACPI_NUMA
-static acpi_status __devinit
-acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
-{
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *obj;
-	struct acpi_table_iosapic *iosapic;
-	unsigned int gsi_base;
-	int pxm, node;
-
-	/* Only care about objects w/ a method that returns the MADT */
-	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
-		return AE_OK;
-
-	if (!buffer.length || !buffer.pointer)
-		return AE_OK;
-
-	obj = buffer.pointer;
-	if (obj->type != ACPI_TYPE_BUFFER ||
-	    obj->buffer.length < sizeof(*iosapic)) {
-		kfree(buffer.pointer);
-		return AE_OK;
-	}
-
-	iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
-
-	if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
-		kfree(buffer.pointer);
-		return AE_OK;
-	}
-
-	gsi_base = iosapic->global_irq_base;
-
-	kfree(buffer.pointer);
-
-	/*
-	 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
-	 * us which node to associate this with.
-	 */
-	pxm = acpi_get_pxm(handle);
-	if (pxm < 0)
-		return AE_OK;
-
-	node = pxm_to_node(pxm);
-
-	if (node >= MAX_NUMNODES || !node_online(node) ||
-	    cpus_empty(node_to_cpumask(node)))
-		return AE_OK;
-
-	/* We know a gsi to node mapping! */
-	map_iosapic_to_node(gsi_base, node);
-	return AE_OK;
-}
-
-static int __init
-acpi_map_iosapics (void)
-{
-	acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
-	return 0;
-}
-
-fs_initcall(acpi_map_iosapics);
-#endif				/* CONFIG_ACPI_NUMA */
-
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
-{
-	int err;
-
-	if ((err = iosapic_init(phys_addr, gsi_base)))
-		return err;
-
-#ifdef CONFIG_ACPI_NUMA
-	acpi_map_iosapic(handle, 0, NULL, NULL);
-#endif				/* CONFIG_ACPI_NUMA */
-
-	return 0;
-}
-
-EXPORT_SYMBOL(acpi_register_ioapic);
-
-int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
-{
-	return iosapic_remove(gsi_base);
-}
-
-EXPORT_SYMBOL(acpi_unregister_ioapic);
-
-#endif				/* CONFIG_ACPI */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
deleted file mode 100644
index 2aa8c101aa..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#define ASM_OFFSETS_C 1
-
-#include <linux/sched.h>
-
-#include <asm-ia64/processor.h>
-#include <asm-ia64/ptrace.h>
-#include <asm-ia64/siginfo.h>
-#include <asm-ia64/sigcontext.h>
-#include <asm-ia64/mca.h>
-
-#include "../kernel/sigframe.h"
-
-#define DEFINE(sym, val) \
-        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-void foo(void)
-{
-	DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
-	DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
-	DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
-	DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
-	DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
-	DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
-	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
-	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
-
-	BLANK();
-
-	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
-	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-
-	BLANK();
-
-	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
-	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
-	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
-	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
-	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
-	DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
-	DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
-	DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
-	DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
-	DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
-
-	BLANK();
-
-	DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
-
-	BLANK();
-
-	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
-							     group_stop_count));
-	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
-
-	BLANK();
-
-	DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
-	DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
-	DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
-	DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
-	DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
-	DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
-	DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
-	DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
-	DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
-	DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
-	DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
-	DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
-	DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
-	DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
-	DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
-
-	DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
-	DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
-	DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
-	DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
-	DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
-	DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
-	DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
-	DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
-	DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
-	DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
-	DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
-	DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
-	DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
-	DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
-	DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
-	DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
-	DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
-	DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
-	DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
-	DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
-	DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
-	DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
-	DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
-	DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
-	DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
-	DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
-	DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
-	DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
-	DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
-	DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
-	DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
-	DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
-	DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
-	DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
-	DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
-
-	BLANK();
-
-	DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
-	DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
-	DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
-	DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
-	DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
-	DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
-	DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
-	DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
-	DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
-	DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
-	DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
-	DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
-	DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
-	DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
-	DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
-	DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
-	DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
-	DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
-	DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
-	DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
-	DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
-	DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
-	DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
-	DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
-	DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
-	DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
-	DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
-	DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
-	DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
-	DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
-	DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
-	DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
-	DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
-	DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
-	DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
-	DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
-	DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
-	DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
-	DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
-	DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
-	DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
-	DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
-
-	BLANK();
-
-	DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
-	DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
-	DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
-	DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
-	DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
-	DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
-	DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
-	DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
-	DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
-	DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
-	DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
-	DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
-	DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
-
-	BLANK();
-
-	DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
-
-	BLANK();
-
-	DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
-	DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
-	DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
-	DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
-	DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
-	BLANK();
-    /* for assembly files which can't include sched.h: */
-	DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
-	DEFINE(IA64_CLONE_VM, CLONE_VM);
-
-	BLANK();
-	DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
-	       offsetof (struct cpuinfo_ia64, nsec_per_cyc));
-	DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
-	       offsetof (struct cpuinfo_ia64, ptce_base));
-	DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
-	       offsetof (struct cpuinfo_ia64, ptce_count));
-	DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
-	       offsetof (struct cpuinfo_ia64, ptce_stride));
-	BLANK();
-	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
-	       offsetof (struct timespec, tv_nsec));
-
-	DEFINE(CLONE_SETTLS_BIT, 19);
-#if CLONE_SETTLS != (1<<19)
-# error "CLONE_SETTLS_BIT incorrect, please fix"
-#endif
-
-	BLANK();
-	DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET,
-	       offsetof (struct ia64_mca_cpu, mca_stack));
-	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
-	       offsetof (struct ia64_mca_cpu, init_stack));
-	BLANK();
-	DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
-	       offsetof (struct ia64_sal_os_state, os_gp));
-	DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
-	       offsetof (struct ia64_sal_os_state, proc_state_param));
-	DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
-	       offsetof (struct ia64_sal_os_state, sal_ra));
-	DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
-	       offsetof (struct ia64_sal_os_state, sal_gp));
-	DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
-	       offsetof (struct ia64_sal_os_state, pal_min_state));
-	DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
-	       offsetof (struct ia64_sal_os_state, os_status));
-	DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
-	       offsetof (struct ia64_sal_os_state, context));
-	DEFINE(IA64_SAL_OS_STATE_SIZE,
-	       sizeof (struct ia64_sal_os_state));
-	BLANK();
-
-	DEFINE(IA64_PMSA_GR_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_gr));
-	DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
-	DEFINE(IA64_PMSA_PR_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_pr));
-	DEFINE(IA64_PMSA_BR0_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_br0));
-	DEFINE(IA64_PMSA_RSC_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_rsc));
-	DEFINE(IA64_PMSA_IIP_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_iip));
-	DEFINE(IA64_PMSA_IPSR_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_ipsr));
-	DEFINE(IA64_PMSA_IFS_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_ifs));
-	DEFINE(IA64_PMSA_XIP_OFFSET,
-	       offsetof (struct pal_min_state_area_s, pmsa_xip));
-	BLANK();
-
-	/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
-	DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
-	DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
-	DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
-	DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
-	DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
-	DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
-	DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
-	DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
-	DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
-	DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
-	DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
-	DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
-
-#ifdef CONFIG_XEN
-	BLANK();
-
-#define DEFINE_MAPPED_REG_OFS(sym, field) \
-	DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(mapped_regs_t, field)))
-
-	DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr);
-	DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr);
-	DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip);
-	DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs);
-	DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs);
-	DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr);
-	DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa);
-	DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa);
-	DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim);
-	DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha);
-	DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
-	DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
-	DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum);
-	DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]);
-	DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]);
-	DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat);
-	DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat);    
-#endif /* CONFIG_XEN */
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S b/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
deleted file mode 100644
index f46bdcf401..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/entry.S
+++ /dev/null
@@ -1,1620 +0,0 @@
-/*
- * ia64/kernel/entry.S
- *
- * Kernel entry points.
- *
- * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999, 2002-2003
- *	Asit Mallick <Asit.K.Mallick@intel.com>
- * 	Don Dugger <Don.Dugger@intel.com>
- *	Suresh Siddha <suresh.b.siddha@intel.com>
- *	Fenghua Yu <fenghua.yu@intel.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- */
-/*
- * ia64_switch_to now places correct virtual mapping in in TR2 for
- * kernel stack. This allows us to handle interrupts without changing
- * to physical mode.
- *
- * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
- * Patrick O'Rourke	<orourke@missioncriticallinux.com>
- * 11/07/2000
- */
-/*
- * Global (preserved) predicate usage on syscall entry/exit path:
- *
- *	pKStk:		See entry.h.
- *	pUStk:		See entry.h.
- *	pSys:		See entry.h.
- *	pNonSys:	!pSys
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#include "minstate.h"
-
-	/*
-	 * execve() is special because in case of success, we need to
-	 * setup a null register window frame.
-	 */
-ENTRY(ia64_execve)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,4,0
-	mov loc0=rp
-	.body
-	mov out0=in0			// filename
-	;;				// stop bit between alloc and call
-	mov out1=in1			// argv
-	mov out2=in2			// envp
-	add out3=16,sp			// regs
-	br.call.sptk.many rp=sys_execve
-.ret0:
-#ifdef CONFIG_IA32_SUPPORT
-	/*
-	 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
-	 * from pt_regs.
-	 */
-	adds r16=PT(CR_IPSR)+16,sp
-	;;
-	ld8 r16=[r16]
-#endif
-	cmp4.ge p6,p7=r8,r0
-	mov ar.pfs=loc1			// restore ar.pfs
-	sxt4 r8=r8			// return 64-bit result
-	;;
-	stf.spill [sp]=f0
-(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
-	mov rp=loc0
-(p6)	mov ar.pfs=r0			// clear ar.pfs on success
-(p7)	br.ret.sptk.many rp
-
-	/*
-	 * In theory, we'd have to zap this state only to prevent leaking of
-	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
-	 * this executes in less than 20 cycles even on Itanium, so it's not worth
-	 * optimizing for...).
-	 */
-	mov ar.unat=0; 		mov ar.lc=0
-	mov r4=0;		mov f2=f0;		mov b1=r0
-	mov r5=0;		mov f3=f0;		mov b2=r0
-	mov r6=0;		mov f4=f0;		mov b3=r0
-	mov r7=0;		mov f5=f0;		mov b4=r0
-	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
-	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
-	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
-	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
-	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
-	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
-	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
-#ifdef CONFIG_IA32_SUPPORT
-	tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
-	movl loc0=ia64_ret_from_ia32_execve
-	;;
-(p6)	mov rp=loc0
-#endif
-	br.ret.sptk.many rp
-END(ia64_execve)
-
-/*
- * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
- *	      u64 tls)
- */
-GLOBAL_ENTRY(sys_clone2)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc r16=ar.pfs,8,2,6,0
-	DO_SAVE_SWITCH_STACK
-	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	.body
-	mov out1=in1
-	mov out3=in2
-	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
-	;;
-(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
-	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
-	mov out0=in0				// out0 = clone_flags
-	br.call.sptk.many rp=do_fork
-.ret1:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(sys_clone2)
-
-/*
- * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
- *	Deprecated.  Use sys_clone2() instead.
- */
-GLOBAL_ENTRY(sys_clone)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc r16=ar.pfs,8,2,6,0
-	DO_SAVE_SWITCH_STACK
-	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	.body
-	mov out1=in1
-	mov out3=16				// stacksize (compensates for 16-byte scratch area)
-	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
-	;;
-(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
-	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
-	mov out0=in0				// out0 = clone_flags
-	br.call.sptk.many rp=do_fork
-.ret2:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(sys_clone)
-
-/*
- * prev_task <- ia64_switch_to(struct task_struct *next)
- *	With Ingo's new scheduler, interrupts are disabled when this routine gets
- *	called.  The code starting at .map relies on this.  The rest of the code
- *	doesn't care about the interrupt masking status.
- */
-GLOBAL_ENTRY(__ia64_switch_to)
-	.prologue
-	alloc r16=ar.pfs,1,0,0,0
-	DO_SAVE_SWITCH_STACK
-	.body
-
-	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
-	movl r25=init_task
-	mov r27=IA64_KR(CURRENT_STACK)
-	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-	dep r20=0,in0,61,3		// physical address of "next"
-	;;
-	st8 [r22]=sp			// save kernel stack pointer of old task
-	shr.u r26=r20,IA64_GRANULE_SHIFT
-	cmp.eq p7,p6=r25,in0
-	;;
-	/*
-	 * If we've already mapped this task's page, we can skip doing it again.
-	 */
-(p6)	cmp.eq p7,p6=r26,r27
-(p6)	br.cond.dpnt .map
-	;;
-.done:
-	ld8 sp=[r21]			// load kernel stack pointer of new task
-	mov IA64_KR(CURRENT)=in0	// update "current" application register
-	mov r8=r13			// return pointer to previously running task
-	mov r13=in0			// set "current" pointer
-	;;
-	DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
-	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
-#endif
-	br.ret.sptk.many rp		// boogie on out in new context
-
-.map:
-	rsm psr.ic			// interrupts (psr.i) are already disabled here
-	movl r25=PAGE_KERNEL
-	;;
-	srlz.d
-	or r23=r25,r20			// construct PA | page properties
-	mov r25=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r25
-	mov cr.ifa=in0			// VA of next task...
-	;;
-	mov r25=IA64_TR_CURRENT_STACK
-	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
-	;;
-	itr.d dtr[r25]=r23		// wire in new mapping...
-	ssm psr.ic			// reenable the psr.ic bit
-	;;
-	srlz.d
-	br.cond.sptk .done
-END(__ia64_switch_to)
-
-/*
- * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
- * means that we may get an interrupt with "sp" pointing to the new kernel stack while
- * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
- * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
- * problem.  Also, we don't need to specify unwind information for preserved registers
- * that are not modified in save_switch_stack as the right unwind information is already
- * specified at the call-site of save_switch_stack.
- */
-
-/*
- * save_switch_stack:
- *	- r16 holds ar.pfs
- *	- b7 holds address to return to
- *	- rp (b0) holds return address to save
- */
-GLOBAL_ENTRY(save_switch_stack)
-	.prologue
-	.altrp b7
-	flushrs			// flush dirty regs to backing store (must be first in insn group)
-	.save @priunat,r17
-	mov r17=ar.unat		// preserve caller's
-	.body
-#ifdef CONFIG_ITANIUM
-	adds r2=16+128,sp
-	adds r3=16+64,sp
-	adds r14=SW(R4)+16,sp
-	;;
-	st8.spill [r14]=r4,16		// spill r4
-	lfetch.fault.excl.nt1 [r3],128
-	;;
-	lfetch.fault.excl.nt1 [r2],128
-	lfetch.fault.excl.nt1 [r3],128
-	;;
-	lfetch.fault.excl [r2]
-	lfetch.fault.excl [r3]
-	adds r15=SW(R5)+16,sp
-#else
-	add r2=16+3*128,sp
-	add r3=16,sp
-	add r14=SW(R4)+16,sp
-	;;
-	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
-	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
-	;;
-	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
-	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
-	;;
-	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
-	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
-	adds r15=SW(R5)+16,sp
-#endif
-	;;
-	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
-	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
-	add r2=SW(F2)+16,sp		// r2 = &sw->f2
-	;;
-	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
-	mov.m r18=ar.fpsr		// preserve fpsr
-	add r3=SW(F3)+16,sp		// r3 = &sw->f3
-	;;
-	stf.spill [r2]=f2,32
-	mov.m r19=ar.rnat
-	mov r21=b0
-
-	stf.spill [r3]=f3,32
-	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
-	mov r22=b1
-	;;
-	// since we're done with the spills, read and save ar.unat:
-	mov.m r29=ar.unat
-	mov.m r20=ar.bspstore
-	mov r23=b2
-	stf.spill [r2]=f4,32
-	stf.spill [r3]=f5,32
-	mov r24=b3
-	;;
-	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
-	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
-	mov r25=b4
-	mov r26=b5
-	;;
-	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
-	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
-	mov r21=ar.lc		// I-unit
-	stf.spill [r2]=f12,32
-	stf.spill [r3]=f13,32
-	;;
-	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
-	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
-	stf.spill [r2]=f14,32
-	stf.spill [r3]=f15,32
-	;;
-	st8 [r14]=r26				// save b5
-	st8 [r15]=r21				// save ar.lc
-	stf.spill [r2]=f16,32
-	stf.spill [r3]=f17,32
-	;;
-	stf.spill [r2]=f18,32
-	stf.spill [r3]=f19,32
-	;;
-	stf.spill [r2]=f20,32
-	stf.spill [r3]=f21,32
-	;;
-	stf.spill [r2]=f22,32
-	stf.spill [r3]=f23,32
-	;;
-	stf.spill [r2]=f24,32
-	stf.spill [r3]=f25,32
-	;;
-	stf.spill [r2]=f26,32
-	stf.spill [r3]=f27,32
-	;;
-	stf.spill [r2]=f28,32
-	stf.spill [r3]=f29,32
-	;;
-	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
-	stf.spill [r3]=f31,SW(PR)-SW(F31)
-	add r14=SW(CALLER_UNAT)+16,sp
-	;;
-	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
-	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
-	mov r21=pr
-	;;
-	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
-	st8 [r3]=r21				// save predicate registers
-	;;
-	st8 [r2]=r20				// save ar.bspstore
-	st8 [r14]=r18				// save fpsr
-	mov ar.rsc=3		// put RSE back into eager mode, pl 0
-	br.cond.sptk.many b7
-END(save_switch_stack)
-
-/*
- * load_switch_stack:
- *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
- *	- b7 holds address to return to
- *	- must not touch r8-r11
- */
-GLOBAL_ENTRY(load_switch_stack)
-	.prologue
-	.altrp b7
-
-	.body
-	lfetch.fault.nt1 [sp]
-	adds r2=SW(AR_BSPSTORE)+16,sp
-	adds r3=SW(AR_UNAT)+16,sp
-	mov ar.rsc=0						// put RSE into enforced lazy mode
-	adds r14=SW(CALLER_UNAT)+16,sp
-	adds r15=SW(AR_FPSR)+16,sp
-	;;
-	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
-	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
-	;;
-	ld8 r21=[r2],16		// restore b0
-	ld8 r22=[r3],16		// restore b1
-	;;
-	ld8 r23=[r2],16		// restore b2
-	ld8 r24=[r3],16		// restore b3
-	;;
-	ld8 r25=[r2],16		// restore b4
-	ld8 r26=[r3],16		// restore b5
-	;;
-	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
-	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
-	;;
-	ld8 r28=[r2]		// restore pr
-	ld8 r30=[r3]		// restore rnat
-	;;
-	ld8 r18=[r14],16	// restore caller's unat
-	ld8 r19=[r15],24	// restore fpsr
-	;;
-	ldf.fill f2=[r14],32
-	ldf.fill f3=[r15],32
-	;;
-	ldf.fill f4=[r14],32
-	ldf.fill f5=[r15],32
-	;;
-	ldf.fill f12=[r14],32
-	ldf.fill f13=[r15],32
-	;;
-	ldf.fill f14=[r14],32
-	ldf.fill f15=[r15],32
-	;;
-	ldf.fill f16=[r14],32
-	ldf.fill f17=[r15],32
-	;;
-	ldf.fill f18=[r14],32
-	ldf.fill f19=[r15],32
-	mov b0=r21
-	;;
-	ldf.fill f20=[r14],32
-	ldf.fill f21=[r15],32
-	mov b1=r22
-	;;
-	ldf.fill f22=[r14],32
-	ldf.fill f23=[r15],32
-	mov b2=r23
-	;;
-	mov ar.bspstore=r27
-	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
-	mov b3=r24
-	;;
-	ldf.fill f24=[r14],32
-	ldf.fill f25=[r15],32
-	mov b4=r25
-	;;
-	ldf.fill f26=[r14],32
-	ldf.fill f27=[r15],32
-	mov b5=r26
-	;;
-	ldf.fill f28=[r14],32
-	ldf.fill f29=[r15],32
-	mov ar.pfs=r16
-	;;
-	ldf.fill f30=[r14],32
-	ldf.fill f31=[r15],24
-	mov ar.lc=r17
-	;;
-	ld8.fill r4=[r14],16
-	ld8.fill r5=[r15],16
-	mov pr=r28,-1
-	;;
-	ld8.fill r6=[r14],16
-	ld8.fill r7=[r15],16
-
-	mov ar.unat=r18				// restore caller's unat
-	mov ar.rnat=r30				// must restore after bspstore but before rsc!
-	mov ar.fpsr=r19				// restore fpsr
-	mov ar.rsc=3				// put RSE back into eager mode, pl 0
-	br.cond.sptk.many b7
-END(load_switch_stack)
-
-GLOBAL_ENTRY(prefetch_stack)
-	add r14 = -IA64_SWITCH_STACK_SIZE, sp
-	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
-	;;
-	ld8 r16 = [r15]				// load next's stack pointer
-	lfetch.fault.excl [r14], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault [r16], 128
-	br.ret.sptk.many rp
-END(prefetch_stack)
-
-GLOBAL_ENTRY(execve)
-	mov r15=__NR_execve			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(execve)
-
-GLOBAL_ENTRY(clone)
-	mov r15=__NR_clone			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(clone)
-
-	/*
-	 * Invoke a system call, but do some tracing before and after the call.
-	 * We MUST preserve the current register frame throughout this routine
-	 * because some system calls (such as ia64_execve) directly
-	 * manipulate ar.pfs.
-	 */
-GLOBAL_ENTRY(__ia64_trace_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * We need to preserve the scratch registers f6-f11 in case the system
-	 * call is sigreturn.
-	 */
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
- 	stf.spill [r16]=f6,32
- 	stf.spill [r17]=f7,32
-	;;
- 	stf.spill [r16]=f8,32
- 	stf.spill [r17]=f9,32
-	;;
- 	stf.spill [r16]=f10
- 	stf.spill [r17]=f11
-	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
-	ldf.fill f6=[r16],32
-	ldf.fill f7=[r17],32
-	;;
-	ldf.fill f8=[r16],32
-	ldf.fill f9=[r17],32
-	;;
-	ldf.fill f10=[r16]
-	ldf.fill f11=[r17]
-	// the syscall number may have changed, so re-load it and re-calculate the
-	// syscall entry-point:
-	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
-	;;
-	ld8 r15=[r15]
-	mov r3=NR_syscalls - 1
-	;;
-	adds r15=-1024,r15
-	movl r16=sys_call_table
-	;;
-	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.leu p6,p7=r15,r3
-	;;
-(p6)	ld8 r20=[r20]				// load address of syscall entry point
-(p7)	movl r20=sys_ni_syscall
-	;;
-	mov b6=r20
-	br.call.sptk.many rp=b6			// do the syscall
-.strace_check_retval:
-	cmp.lt p6,p0=r8,r0			// syscall failed?
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	mov r10=0
-(p6)	br.cond.sptk strace_error		// syscall failed ->
-	;;					// avoid RAW on r10
-.strace_save_retval:
-.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
-.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
-	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-.ret3:
-(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
-	br.cond.sptk .work_pending_syscall_end
-
-strace_error:
-	ld8 r3=[r2]				// load pt_regs.r8
-	sub r9=0,r8				// negate return value to get errno value
-	;;
-	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
-	adds r3=16,r2				// r3=&pt_regs.r10
-	;;
-(p6)	mov r10=-1
-(p6)	mov r8=r9
-	br.cond.sptk .strace_save_retval
-END(__ia64_trace_syscall)
-
-	/*
-	 * When traced and returning from sigreturn, we invoke syscall_trace but then
-	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
-	 */
-GLOBAL_ENTRY(ia64_strace_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-}
-.ret4:	br.cond.sptk ia64_leave_kernel
-END(ia64_strace_leave_kernel)
-
-GLOBAL_ENTRY(__ia64_ret_from_clone)
-	PT_REGS_UNWIND_INFO(0)
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	/*
-	 * We need to call schedule_tail() to complete the scheduling process.
-	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
-	 * address of the previously executing task.
-	 */
-	br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret8:
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 r2=[r2]
-	;;
-	mov r8=0
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
-	;;
-	cmp.ne p6,p0=r2,r0
-(p6)	br.cond.spnt .strace_check_retval
-	;;					// added stop bits to prevent r8 dependency
-END(__ia64_ret_from_clone)
-	// fall through
-GLOBAL_ENTRY(ia64_ret_from_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	mov r10=r0				// clear error indication in r10
-(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
-	;;
-	// don't fall through, ia64_leave_syscall may be #define'd
-	br.cond.sptk.few ia64_leave_syscall
-	;;
-END(ia64_ret_from_syscall)
-/*
- * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
- *	need to switch to bank 0 and doesn't restore the scratch registers.
- *	To avoid leaking kernel bits, the scratch registers are set to
- *	the following known-to-be-safe values:
- *
- *		  r1: restored (global pointer)
- *		  r2: cleared
- *		  r3: 1 (when returning to user-level)
- *	      r8-r11: restored (syscall return value(s))
- *		 r12: restored (user-level stack pointer)
- *		 r13: restored (user-level thread pointer)
- *		 r14: set to __kernel_syscall_via_epc
- *		 r15: restored (syscall #)
- *	     r16-r17: cleared
- *		 r18: user-level b6
- *		 r19: cleared
- *		 r20: user-level ar.fpsr
- *		 r21: user-level b0
- *		 r22: cleared
- *		 r23: user-level ar.bspstore
- *		 r24: user-level ar.rnat
- *		 r25: user-level ar.unat
- *		 r26: user-level ar.pfs
- *		 r27: user-level ar.rsc
- *		 r28: user-level ip
- *		 r29: user-level psr
- *		 r30: user-level cfm
- *		 r31: user-level pr
- *	      f6-f11: cleared
- *		  pr: restored (user-level pr)
- *		  b0: restored (user-level rp)
- *	          b6: restored
- *		  b7: set to __kernel_syscall_via_epc
- *	     ar.unat: restored (user-level ar.unat)
- *	      ar.pfs: restored (user-level ar.pfs)
- *	      ar.rsc: restored (user-level ar.rsc)
- *	     ar.rnat: restored (user-level ar.rnat)
- *	 ar.bspstore: restored (user-level ar.bspstore)
- *	     ar.fpsr: restored (user-level ar.fpsr)
- *	      ar.ccv: cleared
- *	      ar.csd: cleared
- *	      ar.ssd: cleared
- */
-GLOBAL_ENTRY(__ia64_leave_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
-	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
-(pUStk)	rsm psr.i
-	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.work_processed_syscall:
-	adds r2=PT(LOADRS)+16,r12
-	adds r3=PT(AR_BSPSTORE)+16,r12
-	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
-	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	nop.i 0
-	;;
-	mov r16=ar.bsp				// M2  get existing backing store pointer
-	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
-(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
-(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
-(p6)	br.cond.spnt .work_pending_syscall
-	;;
-	// start restoring the state saved on the kernel stack (struct pt_regs):
-	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
-	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
-	;;
-	invala			// M0|1 invalidate ALAT
-	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
-	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
-
-	ld8 r29=[r2],16		// M0|1 load cr.ipsr
-	ld8 r28=[r3],16		// M0|1 load cr.iip
-	mov r22=r0		// A    clear r22
-	;;
-	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	ld8 r25=[r3],16		// M0|1 load ar.unat
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-	;;
-	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
-	nop 0
-	;;
-	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
-	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
-	mov f6=f0			// F    clear f6
-	;;
-	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
-	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
-	mov f7=f0				// F    clear f7
-	;;
-	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
-	ld8.fill r1=[r3],16			// M0|1 load r1
-(pUStk) mov r17=1				// A
-	;;
-(pUStk) st1 [r14]=r17				// M2|3
-	ld8.fill r13=[r3],16			// M0|1
-	mov f8=f0				// F    clear f8
-	;;
-	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
-	ld8.fill r15=[r3]			// M0|1 restore r15
-	mov b6=r18				// I0   restore b6
-
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
-	mov f9=f0					// F    clear f9
-(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
-
-	srlz.d				// M0   ensure interruption collection is off (for cover)
-	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
-	cover				// B    add current frame into dirty partition & set cr.ifs
-	;;
-(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
-	mov r19=ar.bsp			// M2   get new backing store pointer
-	mov f10=f0			// F    clear f10
-
-	nop.m 0
-	movl r14=__kernel_syscall_via_epc // X
-	;;
-	mov.m ar.csd=r0			// M2   clear ar.csd
-	mov.m ar.ccv=r0			// M2   clear ar.ccv
-	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
-
-	mov.m ar.ssd=r0			// M2   clear ar.ssd
-	mov f11=f0			// F    clear f11
-	br.cond.sptk.many rbs_switch	// B
-END(__ia64_leave_syscall)
-
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
-	PT_REGS_UNWIND_INFO(0)
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	;;
-	.mem.offset 0,0
-	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
-	.mem.offset 8,0
-	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
-	;;
-	// don't fall through, ia64_leave_kernel may be #define'd
-	br.cond.sptk.few ia64_leave_kernel
-	;;
-END(ia64_ret_from_ia32_execve)
-#endif /* CONFIG_IA32_SUPPORT */
-GLOBAL_ENTRY(__ia64_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
-	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else
-(pUStk)	rsm psr.i
-	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.work_processed_kernel:
-	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
-	adds r21=PT(PR)+16,r12
-	;;
-
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
-	adds r30=PT(AR_CCV)+16,r12
-(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-(p6)	br.cond.spnt .work_pending
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
-	mov ar.ccv=r15
-	;;
-	ldf.fill f11=[r2]
-	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
-	nop.i 0
-	nop.i 0
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r21=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-(pUStk)	mov r17=1
-	;;
-	ld8.fill r3=[r16]
-(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
-	shr.u r18=r19,16	// get byte size of existing "dirty" partition
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-	;;
-	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
-(pKStk)	br.cond.dpnt skip_rbs_switch
-
-	/*
-	 * Restore user backing store.
-	 *
-	 * NOTE: alloc, loadrs, and cover can't be predicated.
-	 */
-(pNonSys) br.cond.dpnt dont_preserve_current_frame
-	cover				// add current frame into dirty partition and set cr.ifs
-	;;
-	mov r19=ar.bsp			// get new backing store pointer
-rbs_switch:
-	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
-	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
-	;;
-	sub r19=r19,r16			// calculate total byte size of dirty partition
-	add r18=64,r18			// don't force in0-in7 into memory...
-	;;
-	shl r19=r19,16			// shift size of dirty partition into loadrs position
-	;;
-dont_preserve_current_frame:
-	/*
-	 * To prevent leaking bits between the kernel and user-space,
-	 * we must clear the stacked registers in the "invalid" partition here.
-	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
-	 * 5 registers/cycle on McKinley).
-	 */
-#	define pRecurse	p6
-#	define pReturn	p7
-#ifdef CONFIG_ITANIUM
-#	define Nregs	10
-#else
-#	define Nregs	14
-#endif
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
-	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r17
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-rse_clear_invalid:
-#ifdef CONFIG_ITANIUM
-	// cycle 0
- { .mii
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-}{ .mfb
-	add out1=1,in1			// increment recursion count
-	nop.f 0
-	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
-	;;
-}{ .mfi	// cycle 1
-	mov loc1=0
-	nop.f 0
-	mov loc2=0
-}{ .mib
-	mov loc3=0
-	mov loc4=0
-(pRecurse) br.call.sptk.many b0=rse_clear_invalid
-
-}{ .mfi	// cycle 2
-	mov loc5=0
-	nop.f 0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-}{ .mib
-	mov loc6=0
-	mov loc7=0
-(pReturn) br.ret.sptk.many b0
-}
-#else /* !CONFIG_ITANIUM */
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1			// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-#endif /* !CONFIG_ITANIUM */
-#	undef pRecurse
-#	undef pReturn
-	;;
-	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
-	;;
-	loadrs
-	;;
-skip_rbs_switch:
-	mov ar.unat=r25		// M2
-(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
-(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
-	;;
-(pUStk)	mov ar.bspstore=r23	// M2
-(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
-(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
-	;;
-	mov cr.ipsr=r29		// M2
-	mov ar.pfs=r26		// I0
-(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
-
-(p9)	mov cr.ifs=r30		// M2
-	mov b0=r21		// I0
-(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
-
-	mov ar.fpsr=r20		// M2
-	mov cr.iip=r28		// M2
-	nop 0
-	;;
-(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
-	nop 0
-(pLvSys)mov r2=r0
-
-	mov ar.rsc=r27		// M2
-	mov pr=r31,-1		// I0
-	rfi			// B
-
-	/*
-	 * On entry:
-	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
-	 *	r31 = current->thread_info->flags
-	 * On exit:
-	 *	p6 = TRUE if work-pending-check needs to be redone
-	 */
-.work_pending_syscall:
-	add r2=-8,r2
-	add r3=-8,r3
-	;;
-	st8 [r2]=r8
-	st8 [r3]=r10
-.work_pending:
-	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
-(p6)	br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
-	;;
-(pKStk) st4 [r20]=r21
-	ssm psr.i		// enable interrupts
-#endif
-	br.call.spnt.many rp=schedule
-.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
-	rsm psr.i		// disable interrupts
-	;;
-#ifdef CONFIG_PREEMPT
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
-
-.notify:
-(pUStk)	br.call.spnt.many rp=notify_resume_user
-.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// don't re-check
-
-.work_pending_syscall_end:
-	adds r2=PT(R8)+16,r12
-	adds r3=PT(R10)+16,r12
-	;;
-	ld8 r8=[r2]
-	ld8 r10=[r3]
-	br.cond.sptk.many .work_processed_syscall	// re-check
-
-END(__ia64_leave_kernel)
-
-ENTRY(handle_syscall_error)
-	/*
-	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
-	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
-	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
-	 * pt_regs.r8 is zero, we assume that the call completed successfully.
-	 */
-	PT_REGS_UNWIND_INFO(0)
-	ld8 r3=[r2]		// load pt_regs.r8
-	;;
-	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
-	;;
-(p7)	mov r10=-1
-(p7)	sub r8=0,r8		// negate return value to get errno
-	br.cond.sptk ia64_leave_syscall
-END(handle_syscall_error)
-
-	/*
-	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
-	 * in case a system call gets restarted.
-	 */
-GLOBAL_ENTRY(ia64_invoke_schedule_tail)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,1,0
-	mov loc0=rp
-	mov out0=r8				// Address of previous task
-	;;
-	br.call.sptk.many rp=schedule_tail
-.ret11:	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(ia64_invoke_schedule_tail)
-
-	/*
-	 * Setup stack and call do_notify_resume_user().  Note that pSys and pNonSys need to
-	 * be set up by the caller.  We declare 8 input registers so the system call
-	 * args get preserved, in case we need to restart a system call.
-	 */
-GLOBAL_ENTRY(notify_resume_user)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=0				// there is no "oldset"
-	adds out1=8,sp				// out1=&sigscratch->ar_pfs
-(pSys)	mov out2=1				// out2==1 => we're in a syscall
-	;;
-(pNonSys) mov out2=0				// out2==0 => not a syscall
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
-	.body
-	br.call.sptk.many rp=do_notify_resume_user
-.ret15:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(notify_resume_user)
-
-GLOBAL_ENTRY(sys_rt_sigsuspend)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=in0				// mask
-	mov out1=in1				// sigsetsize
-	adds out2=8,sp				// out2=&sigscratch->ar_pfs
-	;;
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out2]=loc1,-8			// save ar.pfs, out2=&sigscratch
-	.body
-	br.call.sptk.many rp=ia64_rt_sigsuspend
-.ret17:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sw->caller_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(sys_rt_sigsuspend)
-
-ENTRY(sys_rt_sigreturn)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	alloc r2=ar.pfs,8,0,1,0
-	.prologue
-	PT_REGS_SAVES(16)
-	adds sp=-16,sp
-	.body
-	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
-	;;
-	/*
-	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
-	 * syscall-entry path does not save them we save them here instead.  Note: we
-	 * don't need to save any other registers that are not saved by the stream-lined
-	 * syscall path, because restore_sigcontext() restores them.
-	 */
-	adds r16=PT(F6)+32,sp
-	adds r17=PT(F7)+32,sp
-	;;
- 	stf.spill [r16]=f6,32
- 	stf.spill [r17]=f7,32
-	;;
- 	stf.spill [r16]=f8,32
- 	stf.spill [r17]=f9,32
-	;;
- 	stf.spill [r16]=f10
- 	stf.spill [r17]=f11
-	adds out0=16,sp				// out0 = &sigscratch
-	br.call.sptk.many rp=ia64_rt_sigreturn
-.ret19:	.restore sp,0
-	adds sp=16,sp
-	;;
-	ld8 r9=[sp]				// load new ar.unat
-	mov.sptk b7=r8,__ia64_leave_kernel
-	;;
-	mov ar.unat=r9
-	br.many b7
-END(sys_rt_sigreturn)
-
-GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
-	.prologue
-	/*
-	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
-	 */
-	mov r16=r0
-	DO_SAVE_SWITCH_STACK
-	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
-.ret21:	.body
-	DO_LOAD_SWITCH_STACK
-	br.cond.sptk.many rp				// goes to ia64_leave_kernel
-END(ia64_prepare_handle_unaligned)
-
-	//
-	// unw_init_running(void (*callback)(info, arg), void *arg)
-	//
-#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
-
-GLOBAL_ENTRY(unw_init_running)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	alloc loc1=ar.pfs,2,3,3,0
-	;;
-	ld8 loc2=[in0],8
-	mov loc0=rp
-	mov r16=loc1
-	DO_SAVE_SWITCH_STACK
-	.body
-
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
-	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
-	adds sp=-EXTRA_FRAME_SIZE,sp
-	.body
-	;;
-	adds out0=16,sp				// &info
-	mov out1=r13				// current
-	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
-	br.call.sptk.many rp=unw_init_frame_info
-1:	adds out0=16,sp				// &info
-	mov b6=loc2
-	mov loc2=gp				// save gp across indirect function call
-	;;
-	ld8 gp=[in0]
-	mov out1=in1				// arg
-	br.call.sptk.many rp=b6			// invoke the callback function
-1:	mov gp=loc2				// restore gp
-
-	// For now, we don't allow changing registers from within
-	// unw_init_running; if we ever want to allow that, we'd
-	// have to do a load_switch_stack here:
-	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
-
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(unw_init_running)
-
-	.rodata
-	.align 8
-	.globl sys_call_table
-sys_call_table:
-	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
-	data8 sys_exit				// 1025
-	data8 sys_read
-	data8 sys_write
-	data8 sys_open
-	data8 sys_close
-	data8 sys_creat				// 1030
-	data8 sys_link
-	data8 sys_unlink
-	data8 ia64_execve
-	data8 sys_chdir
-	data8 sys_fchdir			// 1035
-	data8 sys_utimes
-	data8 sys_mknod
-	data8 sys_chmod
-	data8 sys_chown
-	data8 sys_lseek				// 1040
-	data8 sys_getpid
-	data8 sys_getppid
-	data8 sys_mount
-	data8 sys_umount
-	data8 sys_setuid			// 1045
-	data8 sys_getuid
-	data8 sys_geteuid
-	data8 sys_ptrace
-	data8 sys_access
-	data8 sys_sync				// 1050
-	data8 sys_fsync
-	data8 sys_fdatasync
-	data8 sys_kill
-	data8 sys_rename
-	data8 sys_mkdir				// 1055
-	data8 sys_rmdir
-	data8 sys_dup
-	data8 sys_pipe
-	data8 sys_times
-	data8 ia64_brk				// 1060
-	data8 sys_setgid
-	data8 sys_getgid
-	data8 sys_getegid
-	data8 sys_acct
-	data8 sys_ioctl				// 1065
-	data8 sys_fcntl
-	data8 sys_umask
-	data8 sys_chroot
-	data8 sys_ustat
-	data8 sys_dup2				// 1070
-	data8 sys_setreuid
-	data8 sys_setregid
-	data8 sys_getresuid
-	data8 sys_setresuid
-	data8 sys_getresgid			// 1075
-	data8 sys_setresgid
-	data8 sys_getgroups
-	data8 sys_setgroups
-	data8 sys_getpgid
-	data8 sys_setpgid			// 1080
-	data8 sys_setsid
-	data8 sys_getsid
-	data8 sys_sethostname
-	data8 sys_setrlimit
-	data8 sys_getrlimit			// 1085
-	data8 sys_getrusage
-	data8 sys_gettimeofday
-	data8 sys_settimeofday
-	data8 sys_select
-	data8 sys_poll				// 1090
-	data8 sys_symlink
-	data8 sys_readlink
-	data8 sys_uselib
-	data8 sys_swapon
-	data8 sys_swapoff			// 1095
-	data8 sys_reboot
-	data8 sys_truncate
-	data8 sys_ftruncate
-	data8 sys_fchmod
-	data8 sys_fchown			// 1100
-	data8 ia64_getpriority
-	data8 sys_setpriority
-	data8 sys_statfs
-	data8 sys_fstatfs
-	data8 sys_gettid			// 1105
-	data8 sys_semget
-	data8 sys_semop
-	data8 sys_semctl
-	data8 sys_msgget
-	data8 sys_msgsnd			// 1110
-	data8 sys_msgrcv
-	data8 sys_msgctl
-	data8 sys_shmget
-	data8 sys_shmat
-	data8 sys_shmdt				// 1115
-	data8 sys_shmctl
-	data8 sys_syslog
-	data8 sys_setitimer
-	data8 sys_getitimer
-	data8 sys_ni_syscall			// 1120		/* was: ia64_oldstat */
-	data8 sys_ni_syscall					/* was: ia64_oldlstat */
-	data8 sys_ni_syscall					/* was: ia64_oldfstat */
-	data8 sys_vhangup
-	data8 sys_lchown
-	data8 sys_remap_file_pages		// 1125
-	data8 sys_wait4
-	data8 sys_sysinfo
-	data8 sys_clone
-	data8 sys_setdomainname
-	data8 sys_newuname			// 1130
-	data8 sys_adjtimex
-	data8 sys_ni_syscall					/* was: ia64_create_module */
-	data8 sys_init_module
-	data8 sys_delete_module
-	data8 sys_ni_syscall			// 1135		/* was: sys_get_kernel_syms */
-	data8 sys_ni_syscall					/* was: sys_query_module */
-	data8 sys_quotactl
-	data8 sys_bdflush
-	data8 sys_sysfs
-	data8 sys_personality			// 1140
-	data8 sys_ni_syscall		// sys_afs_syscall
-	data8 sys_setfsuid
-	data8 sys_setfsgid
-	data8 sys_getdents
-	data8 sys_flock				// 1145
-	data8 sys_readv
-	data8 sys_writev
-	data8 sys_pread64
-	data8 sys_pwrite64
-	data8 sys_sysctl			// 1150
-	data8 sys_mmap
-	data8 sys_munmap
-	data8 sys_mlock
-	data8 sys_mlockall
-	data8 sys_mprotect			// 1155
-	data8 ia64_mremap
-	data8 sys_msync
-	data8 sys_munlock
-	data8 sys_munlockall
-	data8 sys_sched_getparam		// 1160
-	data8 sys_sched_setparam
-	data8 sys_sched_getscheduler
-	data8 sys_sched_setscheduler
-	data8 sys_sched_yield
-	data8 sys_sched_get_priority_max	// 1165
-	data8 sys_sched_get_priority_min
-	data8 sys_sched_rr_get_interval
-	data8 sys_nanosleep
-	data8 sys_nfsservctl
-	data8 sys_prctl				// 1170
-	data8 sys_getpagesize
-	data8 sys_mmap2
-	data8 sys_pciconfig_read
-	data8 sys_pciconfig_write
-	data8 sys_perfmonctl			// 1175
-	data8 sys_sigaltstack
-	data8 sys_rt_sigaction
-	data8 sys_rt_sigpending
-	data8 sys_rt_sigprocmask
-	data8 sys_rt_sigqueueinfo		// 1180
-	data8 sys_rt_sigreturn
-	data8 sys_rt_sigsuspend
-	data8 sys_rt_sigtimedwait
-	data8 sys_getcwd
-	data8 sys_capget			// 1185
-	data8 sys_capset
-	data8 sys_sendfile64
-	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
-	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
-	data8 sys_socket			// 1190
-	data8 sys_bind
-	data8 sys_connect
-	data8 sys_listen
-	data8 sys_accept
-	data8 sys_getsockname			// 1195
-	data8 sys_getpeername
-	data8 sys_socketpair
-	data8 sys_send
-	data8 sys_sendto
-	data8 sys_recv				// 1200
-	data8 sys_recvfrom
-	data8 sys_shutdown
-	data8 sys_setsockopt
-	data8 sys_getsockopt
-	data8 sys_sendmsg			// 1205
-	data8 sys_recvmsg
-	data8 sys_pivot_root
-	data8 sys_mincore
-	data8 sys_madvise
-	data8 sys_newstat			// 1210
-	data8 sys_newlstat
-	data8 sys_newfstat
-	data8 sys_clone2
-	data8 sys_getdents64
-	data8 sys_getunwind			// 1215
-	data8 sys_readahead
-	data8 sys_setxattr
-	data8 sys_lsetxattr
-	data8 sys_fsetxattr
-	data8 sys_getxattr			// 1220
-	data8 sys_lgetxattr
-	data8 sys_fgetxattr
-	data8 sys_listxattr
-	data8 sys_llistxattr
-	data8 sys_flistxattr			// 1225
-	data8 sys_removexattr
-	data8 sys_lremovexattr
-	data8 sys_fremovexattr
-	data8 sys_tkill
-	data8 sys_futex				// 1230
-	data8 sys_sched_setaffinity
-	data8 sys_sched_getaffinity
-	data8 sys_set_tid_address
-	data8 sys_fadvise64_64
-	data8 sys_tgkill 			// 1235
-	data8 sys_exit_group
-	data8 sys_lookup_dcookie
-	data8 sys_io_setup
-	data8 sys_io_destroy
-	data8 sys_io_getevents			// 1240
-	data8 sys_io_submit
-	data8 sys_io_cancel
-	data8 sys_epoll_create
-	data8 sys_epoll_ctl
-	data8 sys_epoll_wait			// 1245
-	data8 sys_restart_syscall
-	data8 sys_semtimedop
-	data8 sys_timer_create
-	data8 sys_timer_settime
-	data8 sys_timer_gettime			// 1250
-	data8 sys_timer_getoverrun
-	data8 sys_timer_delete
-	data8 sys_clock_settime
-	data8 sys_clock_gettime
-	data8 sys_clock_getres			// 1255
-	data8 sys_clock_nanosleep
-	data8 sys_fstatfs64
-	data8 sys_statfs64
-	data8 sys_mbind
-	data8 sys_get_mempolicy			// 1260
-	data8 sys_set_mempolicy
-	data8 sys_mq_open
-	data8 sys_mq_unlink
-	data8 sys_mq_timedsend
-	data8 sys_mq_timedreceive		// 1265
-	data8 sys_mq_notify
-	data8 sys_mq_getsetattr
-	data8 sys_ni_syscall			// reserved for kexec_load
-	data8 sys_ni_syscall			// reserved for vserver
-	data8 sys_waitid			// 1270
-	data8 sys_add_key
-	data8 sys_request_key
-	data8 sys_keyctl
-	data8 sys_ioprio_set
-	data8 sys_ioprio_get			// 1275
-	data8 sys_move_pages
-	data8 sys_inotify_init
-	data8 sys_inotify_add_watch
-	data8 sys_inotify_rm_watch
-	data8 sys_migrate_pages			// 1280
-	data8 sys_openat
-	data8 sys_mkdirat
-	data8 sys_mknodat
-	data8 sys_fchownat
-	data8 sys_futimesat			// 1285
-	data8 sys_newfstatat
-	data8 sys_unlinkat
-	data8 sys_renameat
-	data8 sys_linkat
-	data8 sys_symlinkat			// 1290
-	data8 sys_readlinkat
-	data8 sys_fchmodat
-	data8 sys_faccessat
-	data8 sys_ni_syscall			// reserved for pselect
-	data8 sys_ni_syscall			// 1295 reserved for ppoll
-	data8 sys_unshare
-	data8 sys_splice
-	data8 sys_ni_syscall			// reserved for set_robust_list
-	data8 sys_ni_syscall			// reserved for get_robust_list
-	data8 sys_sync_file_range		// 1300
-	data8 sys_tee
-	data8 sys_vmsplice
-
-	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S b/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
deleted file mode 100644
index 98b5d15855..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/fsys.S
+++ /dev/null
@@ -1,925 +0,0 @@
-/*
- * This file contains the light-weight system call handlers (fsyscall-handlers).
- *
- * Copyright (C) 2003 Hewlett-Packard Co
- * 	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
- * 18-Feb-03 louisk	Implement fsys_gettimeofday().
- * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
- *			probably broke it along the way... ;-)
- * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
- *                      it capable of using memory based clocks without falling back to C code.
- */
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/percpu.h>
-#include <asm/thread_info.h>
-#include <asm/sal.h>
-#include <asm/signal.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-
-#include "entry.h"
-
-/*
- * See Documentation/ia64/fsys.txt for details on fsyscalls.
- *
- * On entry to an fsyscall handler:
- *   r10	= 0 (i.e., defaults to "successful syscall return")
- *   r11	= saved ar.pfs (a user-level value)
- *   r15	= system call number
- *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
- *   r32-r39	= system call arguments
- *   b6		= return address (a user-level value)
- *   ar.pfs	= previous frame-state (a user-level value)
- *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
- *   all other registers may contain values passed in from user-mode
- *
- * On return from an fsyscall handler:
- *   r11	= saved ar.pfs (as passed into the fsyscall handler)
- *   r15	= system call number (as passed into the fsyscall handler)
- *   r32-r39	= system call arguments (as passed into the fsyscall handler)
- *   b6		= return address (as passed into the fsyscall handler)
- *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
- */
-
-ENTRY(fsys_ni_syscall)
-	.prologue
-	.altrp b6
-	.body
-	mov r8=ENOSYS
-	mov r10=-1
-	FSYS_RETURN
-END(fsys_ni_syscall)
-
-ENTRY(fsys_getpid)
-	.prologue
-	.altrp b6
-	.body
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-	ld4 r9=[r9]
-	add r8=IA64_TASK_TGID_OFFSET,r16
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]				// r8 = current->tgid
-	;;
-	cmp.ne p8,p0=0,r9
-(p8)	br.spnt.many fsys_fallback_syscall
-	FSYS_RETURN
-END(fsys_getpid)
-
-ENTRY(fsys_getppid)
-	.prologue
-	.altrp b6
-	.body
-	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
-	;;
-	ld8 r17=[r17]				// r17 = current->group_leader
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-
-	ld4 r9=[r9]
-	add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-
-1:	ld8 r18=[r17]				// r18 = current->group_leader->real_parent
-	;;
-	cmp.ne p8,p0=0,r9
-	add r8=IA64_TASK_TGID_OFFSET,r18	// r8 = &current->group_leader->real_parent->tgid
-	;;
-
-	/*
-	 * The .acq is needed to ensure that the read of tgid has returned its data before
-	 * we re-check "real_parent".
-	 */
-	ld4.acq r8=[r8]				// r8 = current->group_leader->real_parent->tgid
-#ifdef CONFIG_SMP
-	/*
-	 * Re-read current->group_leader->real_parent.
-	 */
-	ld8 r19=[r17]				// r19 = current->group_leader->real_parent
-(p8)	br.spnt.many fsys_fallback_syscall
-	;;
-	cmp.ne p6,p0=r18,r19			// did real_parent change?
-	mov r19=0			// i must not leak kernel bits...
-(p6)	br.cond.spnt.few 1b			// yes -> redo the read of tgid and the check
-	;;
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-#else
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-	mov r19=0			// i must not leak kernel bits...
-#endif
-	FSYS_RETURN
-END(fsys_getppid)
-
-ENTRY(fsys_set_tid_address)
-	.prologue
-	.altrp b6
-	.body
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-	ld4 r9=[r9]
-	tnat.z p6,p7=r32		// check argument register for being NaT
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_TASK_PID_OFFSET,r16
-	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
-	;;
-	ld4 r8=[r8]
-	cmp.ne p8,p0=0,r9
-	mov r17=-1
-	;;
-(p6)	st8 [r18]=r32
-(p7)	st8 [r18]=r17
-(p8)	br.spnt.many fsys_fallback_syscall
-	;;
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-	FSYS_RETURN
-END(fsys_set_tid_address)
-
-/*
- * Ensure that the time interpolator structure is compatible with the asm code
- */
-#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
-	|| IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
-#error fsys_gettimeofday incompatible with changes to struct time_interpolator
-#endif
-#define CLOCK_REALTIME 0
-#define CLOCK_MONOTONIC 1
-#define CLOCK_DIVIDE_BY_1000 0x4000
-#define CLOCK_ADD_MONOTONIC 0x8000
-
-ENTRY(fsys_gettimeofday)
-	.prologue
-	.altrp b6
-	.body
-	mov r31 = r32
-	tnat.nz p6,p0 = r33		// guard against NaT argument
-(p6)    br.cond.spnt.few .fail_einval
-	mov r30 = CLOCK_DIVIDE_BY_1000
-	;;
-.gettime:
-	// Register map
-	// Incoming r31 = pointer to address where to place result
-	//          r30 = flags determining how time is processed
-	// r2,r3 = temp r4-r7 preserved
-	// r8 = result nanoseconds
-	// r9 = result seconds
-	// r10 = temporary storage for clock difference
-	// r11 = preserved: saved ar.pfs
-	// r12 = preserved: memory stack
-	// r13 = preserved: thread pointer
-	// r14 = address of mask / mask
-	// r15 = preserved: system call number
-	// r16 = preserved: current task pointer
-	// r17 = wall to monotonic use
-	// r18 = time_interpolator->offset
-	// r19 = address of wall_to_monotonic
-	// r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
-	// r21 = shift factor
-	// r22 = address of time interpolator->last_counter
-	// r23 = address of time_interpolator->last_cycle
-	// r24 = adress of time_interpolator->offset
-	// r25 = last_cycle value
-	// r26 = last_counter value
-	// r27 = pointer to xtime
-	// r28 = sequence number at the beginning of critcal section
-	// r29 = address of seqlock
-	// r30 = time processing flags / memory address
-	// r31 = pointer to result
-	// Predicates
-	// p6,p7 short term use
-	// p8 = timesource ar.itc
-	// p9 = timesource mmio64
-	// p10 = timesource mmio32
-	// p11 = timesource not to be handled by asm code
-	// p12 = memory time source ( = p9 | p10)
-	// p13 = do cmpxchg with time_interpolator_last_cycle
-	// p14 = Divide by 1000
-	// p15 = Add monotonic
-	//
-	// Note that instructions are optimized for McKinley. McKinley can process two
-	// bundles simultaneously and therefore we continuously try to feed the CPU
-	// two bundles and then a stop.
-	tnat.nz p6,p0 = r31	// branch deferred since it does not fit into bundle structure
-	mov pr = r30,0xc000	// Set predicates according to function
-	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
-	movl r20 = time_interpolator
-	;;
-	ld8 r20 = [r20]		// get pointer to time_interpolator structure
-	movl r29 = xtime_lock
-	ld4 r2 = [r2]		// process work pending flags
-	movl r27 = xtime
-	;;	// only one bundle here
-	ld8 r21 = [r20]		// first quad with control information
-	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// deferred branch
-	;;
-	add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
-	extr r3 = r21,32,32	// time_interpolator->nsec_per_cyc
-	extr r8 = r21,0,16	// time_interpolator->source
-	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
-	;;
-	cmp.eq p8,p12 = 0,r8	// Check for cpu timer
-	cmp.eq p9,p0 = 1,r8	// MMIO64 ?
-	extr r2 = r21,24,8	// time_interpolator->jitter
-	cmp.eq p10,p0 = 2,r8	// MMIO32 ?
-	cmp.ltu p11,p0 = 2,r8	// function or other clock
-(p11)	br.cond.spnt.many fsys_fallback_syscall
-	;;
-	setf.sig f7 = r3	// Setup for scaling of counter
-(p15)	movl r19 = wall_to_monotonic
-(p12)	ld8 r30 = [r10]
-	cmp.ne p13,p0 = r2,r0	// need jitter compensation?
-	extr r21 = r21,16,8	// shift factor
-	;;
-.time_redo:
-	.pred.rel.mutex p8,p9,p10
-	ld4.acq r28 = [r29]	// xtime_lock.sequence. Must come first for locking purposes
-(p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
-	add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
-(p9)	ld8 r2 = [r30]		// readq(ti->address). Could also have latency issues..
-(p10)	ld4 r2 = [r30]		// readw(ti->address)
-(p13)	add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
-	;;			// could be removed by moving the last add upward
-	ld8 r26 = [r22]		// time_interpolator->last_counter
-(p13)	ld8 r25 = [r23]		// time interpolator->last_cycle
-	add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
-(p15)	ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
- 	ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
-	add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
-	;;
-	ld8 r18 = [r24]		// time_interpolator->offset
-	ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET	// xtime.tv_nsec
-(p13)	sub r3 = r25,r2	// Diff needed before comparison (thanks davidm)
-	;;
-	ld8 r14 = [r14]		// time_interpolator->mask
-(p13)	cmp.gt.unc p6,p7 = r3,r0	// check if it is less than last. p6,p7 cleared
-	sub r10 = r2,r26	// current_counter - last_counter
-	;;
-(p6)	sub r10 = r25,r26	// time we got was less than last_cycle
-(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
-	;;
-	and r10 = r10,r14	// Apply mask
-	;;
-	setf.sig f8 = r10
-	nop.i 123
-	;;
-(p7)	cmpxchg8.rel r3 = [r23],r2,ar.ccv
-EX(.fail_efault, probe.w.fault r31, 3)	// This takes 5 cycles and we have spare time
-	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
-(p15)	add r9 = r9,r17		// Add wall to monotonic.secs to result secs
-	;;
-(p15)	ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
-(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful redo
-	// simulate tbit.nz.or p7,p0 = r28,0
-	and r28 = ~1,r28	// Make sequence even to force retry if odd
-	getf.sig r2 = f8
-	mf
-	add r8 = r8,r18		// Add time interpolator offset
-	;;
-	ld4 r10 = [r29]		// xtime_lock.sequence
-(p15)	add r8 = r8, r17	// Add monotonic.nsecs to nsecs
-	shr.u r2 = r2,r21
-	;;		// overloaded 3 bundles!
-	// End critical section.
-	add r8 = r8,r2		// Add xtime.nsecs
-	cmp4.ne.or p7,p0 = r28,r10
-(p7)	br.cond.dpnt.few .time_redo	// sequence number changed ?
-	// Now r8=tv->tv_nsec and r9=tv->tv_sec
-	mov r10 = r0
-	movl r2 = 1000000000
-	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
-(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
-	;;
-.time_normalize:
-	mov r21 = r8
-	cmp.ge p6,p0 = r8,r2
-(p14)	shr.u r20 = r8, 3		// We can repeat this if necessary just wasting some time
-	;;
-(p14)	setf.sig f8 = r20
-(p6)	sub r8 = r8,r2
-(p6)	add r9 = 1,r9			// two nops before the branch.
-(p14)	setf.sig f7 = r3		// Chances for repeats are 1 in 10000 for gettod
-(p6)	br.cond.dpnt.few .time_normalize
-	;;
-	// Divided by 8 though shift. Now divide by 125
-	// The compiler was able to do that with a multiply
-	// and a shift and we do the same
-EX(.fail_efault, probe.w.fault r23, 3)		// This also costs 5 cycles
-(p14)	xmpy.hu f8 = f8, f7			// xmpy has 5 cycles latency so use it...
-	;;
-	mov r8 = r0
-(p14)	getf.sig r2 = f8
-	;;
-(p14)	shr.u r21 = r2, 4
-	;;
-EX(.fail_efault, st8 [r31] = r9)
-EX(.fail_efault, st8 [r23] = r21)
-	FSYS_RETURN
-.fail_einval:
-	mov r8 = EINVAL
-	mov r10 = -1
-	FSYS_RETURN
-.fail_efault:
-	mov r8 = EFAULT
-	mov r10 = -1
-	FSYS_RETURN
-END(fsys_gettimeofday)
-
-ENTRY(fsys_clock_gettime)
-	.prologue
-	.altrp b6
-	.body
-	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
-	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
-(p6)	br.spnt.few fsys_fallback_syscall
-	mov r31 = r33
-	shl r30 = r32,15
-	br.many .gettime
-END(fsys_clock_gettime)
-
-/*
- * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
- */
-#if _NSIG_WORDS != 1
-# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
-#endif
-ENTRY(fsys_rt_sigprocmask)
-	.prologue
-	.altrp b6
-	.body
-
-	add r2=IA64_TASK_BLOCKED_OFFSET,r16
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	cmp4.ltu p6,p0=SIG_SETMASK,r32
-
-	cmp.ne p15,p0=r0,r34			// oset != NULL?
-	tnat.nz p8,p0=r34
-	add r31=IA64_TASK_SIGHAND_OFFSET,r16
-	;;
-	ld8 r3=[r2]				// read/prefetch current->blocked
-	ld4 r9=[r9]
-	tnat.nz.or p6,p0=r35
-
-	cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
-	tnat.nz.or p6,p0=r32
-(p6)	br.spnt.few .fail_einval		// fail with EINVAL
-	;;
-#ifdef CONFIG_SMP
-	ld8 r31=[r31]				// r31 <- current->sighand
-#endif
-	and r9=TIF_ALLWORK_MASK,r9
-	tnat.nz.or p8,p0=r33
-	;;
-	cmp.ne p7,p0=0,r9
-	cmp.eq p6,p0=r0,r33			// set == NULL?
-	add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31	// r31 <- current->sighand->siglock
-(p8)	br.spnt.few .fail_efault		// fail with EFAULT
-(p7)	br.spnt.many fsys_fallback_syscall	// got pending kernel work...
-(p6)	br.dpnt.many .store_mask		// -> short-circuit to just reading the signal mask
-
-	/* Argh, we actually have to do some work and _update_ the signal mask: */
-
-EX(.fail_efault, probe.r.fault r33, 3)		// verify user has read-access to *set
-EX(.fail_efault, ld8 r14=[r33])			// r14 <- *set
-	mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
-	;;
-
-	rsm psr.i				// mask interrupt delivery
-	mov ar.ccv=0
-	andcm r14=r14,r17			// filter out SIGKILL & SIGSTOP
-
-#ifdef CONFIG_SMP
-	mov r17=1
-	;;
-	cmpxchg4.acq r18=[r31],r17,ar.ccv	// try to acquire the lock
-	mov r8=EINVAL			// default to EINVAL
-	;;
-	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
-	cmp4.ne p6,p0=r18,r0
-(p6)	br.cond.spnt.many .lock_contention
-	;;
-#else
-	ld8 r3=[r2]			// re-read current->blocked now that we hold the lock
-	mov r8=EINVAL			// default to EINVAL
-#endif
-	add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
-	add r19=IA64_TASK_SIGNAL_OFFSET,r16
-	cmp4.eq p6,p0=SIG_BLOCK,r32
-	;;
-	ld8 r19=[r19]			// r19 <- current->signal
-	cmp4.eq p7,p0=SIG_UNBLOCK,r32
-	cmp4.eq p8,p0=SIG_SETMASK,r32
-	;;
-	ld8 r18=[r18]			// r18 <- current->pending.signal
-	.pred.rel.mutex p6,p7,p8
-(p6)	or r14=r3,r14			// SIG_BLOCK
-(p7)	andcm r14=r3,r14		// SIG_UNBLOCK
-
-(p8)	mov r14=r14			// SIG_SETMASK
-(p6)	mov r8=0			// clear error code
-	// recalc_sigpending()
-	add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
-
-	add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
-	;;
-	ld4 r17=[r17]		// r17 <- current->signal->group_stop_count
-(p7)	mov r8=0		// clear error code
-
-	ld8 r19=[r19]		// r19 <- current->signal->shared_pending
-	;;
-	cmp4.gt p6,p7=r17,r0	// p6/p7 <- (current->signal->group_stop_count > 0)?
-(p8)	mov r8=0		// clear error code
-
-	or r18=r18,r19		// r18 <- current->pending | current->signal->shared_pending
-	;;
-	// r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
-	andcm r18=r18,r14
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-
-(p7)	cmp.ne.or.andcm p6,p7=r18,r0		// p6/p7 <- signal pending
-	mov r19=0					// i must not leak kernel bits...
-(p6)	br.cond.dpnt.many .sig_pending
-	;;
-
-1:	ld4 r17=[r9]				// r17 <- current->thread_info->flags
-	;;
-	mov ar.ccv=r17
-	and r18=~_TIF_SIGPENDING,r17		// r18 <- r17 & ~(1 << TIF_SIGPENDING)
-	;;
-
-	st8 [r2]=r14				// update current->blocked with new mask
-	cmpxchg4.acq r8=[r9],r18,ar.ccv		// current->thread_info->flags <- r18
-	;;
-	cmp.ne p6,p0=r17,r8			// update failed?
-(p6)	br.cond.spnt.few 1b			// yes -> retry
-
-#ifdef CONFIG_SMP
-	st4.rel [r31]=r0			// release the lock
-#endif
-	ssm psr.i
-	;;
-
-	srlz.d					// ensure psr.i is set again
-	mov r18=0					// i must not leak kernel bits...
-
-.store_mask:
-EX(.fail_efault, (p15) probe.w.fault r34, 3)	// verify user has write-access to *oset
-EX(.fail_efault, (p15) st8 [r34]=r3)
-	mov r2=0					// i must not leak kernel bits...
-	mov r3=0					// i must not leak kernel bits...
-	mov r8=0				// return 0
-	mov r9=0					// i must not leak kernel bits...
-	mov r14=0					// i must not leak kernel bits...
-	mov r17=0					// i must not leak kernel bits...
-	mov r31=0					// i must not leak kernel bits...
-	FSYS_RETURN
-
-.sig_pending:
-#ifdef CONFIG_SMP
-	st4.rel [r31]=r0			// release the lock
-#endif
-	ssm psr.i
-	;;
-	srlz.d
-	br.sptk.many fsys_fallback_syscall	// with signal pending, do the heavy-weight syscall
-
-#ifdef CONFIG_SMP
-.lock_contention:
-	/* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
-	ssm psr.i
-	;;
-	srlz.d
-	br.sptk.many fsys_fallback_syscall
-#endif
-END(fsys_rt_sigprocmask)
-
-ENTRY(fsys_fallback_syscall)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * We only get here from light-weight syscall handlers.  Thus, we already
-	 * know that r15 contains a valid syscall number.  No need to re-check.
-	 */
-	adds r17=-1024,r15
-	movl r14=sys_call_table
-	;;
-#ifdef CONFIG_XEN
-	movl r18=running_on_xen;;
-	ld4 r18=[r18];;
-	// p14 = running_on_xen
-	// p15 = !running_on_xen
-	cmp.ne p14,p15=r0,r18
-	;;    
-(p14)	movl r18=XSI_PSR_I_ADDR;;
-(p14)	ld8 r18=[r18]
-(p14)	mov r29=1;;
-(p14)	st1 [r18]=r29
-(p15)	rsm psr.i
-#else    
-	rsm psr.i
-#endif    
-	shladd r18=r17,3,r14
-	;;
-	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
-#ifdef CONFIG_XEN
-(p14)	mov r27=r8
-(p14)	XEN_HYPER_GET_PSR
-	;;
-(p14)	mov r29=r8
-(p14)	mov r8=r27
-(p15)	mov r29=psr				// read psr (12 cyc load latency)
-#else    
-	mov r29=psr				// read psr (12 cyc load latency)
-#endif    
-	mov r27=ar.rsc
-	mov r21=ar.fpsr
-	mov r26=ar.pfs
-END(fsys_fallback_syscall)
-	/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * We get here for syscalls that don't have a lightweight
-	 * handler.  For those, we need to bubble down into the kernel
-	 * and that requires setting up a minimal pt_regs structure,
-	 * and initializing the CPU state more or less as if an
-	 * interruption had occurred.  To make syscall-restarts work,
-	 * we setup pt_regs such that cr_iip points to the second
-	 * instruction in syscall_via_break.  Decrementing the IP
-	 * hence will restart the syscall via break and not
-	 * decrementing IP will return us to the caller, as usual.
-	 * Note that we preserve the value of psr.pp rather than
-	 * initializing it from dcr.pp.  This makes it possible to
-	 * distinguish fsyscall execution from other privileged
-	 * execution.
-	 *
-	 * On entry:
-	 *	- normal fsyscall handler register usage, except
-	 *	  that we also have:
-	 *	- r18: address of syscall entry point
-	 *	- r21: ar.fpsr
-	 *	- r26: ar.pfs
-	 *	- r27: ar.rsc
-	 *	- r29: psr
-	 *
-	 * We used to clear some PSR bits here but that requires slow
-	 * serialization.  Fortuntely, that isn't really necessary.
-	 * The rationale is as follows: we used to clear bits
-	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
-	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
-	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
-	 * However,
-	 *
-	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
-	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
-	 * PSR.I  : already turned off by the time fsys_bubble_down gets
-	 *	    invoked
-	 * PSR.DFL: always 0 (kernel never turns it on)
-	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
-	 *	    initiative
-	 * PSR.DI : always 0 (kernel never turns it on)
-	 * PSR.SI : always 0 (kernel never turns it on)
-	 * PSR.DB : don't care --- kernel never enables kernel-level
-	 *	    breakpoints
-	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
-	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
-	 *          will trigger a taken branch; the taken-trap-handler then
-	 *          converts the syscall into a break-based system-call.
-	 */
-	/*
-	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
-	 * The rest we have to synthesize.
-	 */
-#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
-					 | (0x1 << IA64_PSR_RI_BIT)	\
-					 | IA64_PSR_BN | IA64_PSR_I)
-
-	invala					// M0|1
-	movl r14=ia64_ret_from_syscall		// X
-
-	nop.m 0
-	movl r28=__kernel_syscall_via_break	// X	create cr.iip
-	;;
-
-	mov r2=r16				// A    get task addr to addl-addressable register
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
-	mov r31=pr				// I0   save pr (2 cyc)
-	;;
-	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
-	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
-	;;
-	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
-	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
-	nop.i 0
-	;;
-	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-	nop.m 0
-	nop.i 0
-	;;
-	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
-	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
-	nop.i 0
-	;;
-	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
-	movl r8=PSR_ONE_BITS			// X
-	;;
-	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
-	mov r19=b6				// I0   save b6 (2 cyc)
-	mov r20=r1				// A    save caller's gp in r20
-	;;
-	or r29=r8,r29				// A    construct cr.ipsr value to save
-	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
-	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
-
-	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
-	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
-	br.call.sptk.many b7=ia64_syscall_setup	// B
-	;;
-	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
-	mov rp=r14				// I0   set the real return addr
-	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
-	;;
-#ifdef CONFIG_XEN
-	movl r14=running_on_xen;;
-	ld4 r14=[r14];;
-	// p14 = running_on_xen
-	// p15 = !running_on_xen
-	cmp.ne p14,p15=r0,r14
-	;; 
-(p14)	movl r28=XSI_PSR_I_ADDR;;
-(p14)	ld8 r28=[r28];;
-(p14)	adds r28=-1,r28;;			// event_pending
-(p14)	ld1 r14=[r28];;
-(p14)	cmp.ne.unc p13,p14=r14,r0;;
-(p13)	XEN_HYPER_SSM_I
-(p14)	adds r28=1,r28;;			// event_mask
-(p14)	st1 [r28]=r0;;
-(p15)	ssm psr.i
-#else
-	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
-#endif    
-	cmp.eq p8,p0=r3,r0			// A
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
-
-	nop.m 0
-(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
-	br.cond.spnt ia64_trace_syscall		// B
-END(fsys_bubble_down)
-
-	.rodata
-	.align 8
-	.globl fsyscall_table
-
-	data8 fsys_bubble_down
-fsyscall_table:
-	data8 fsys_ni_syscall
-	data8 0				// exit			// 1025
-	data8 0				// read
-	data8 0				// write
-	data8 0				// open
-	data8 0				// close
-	data8 0				// creat		// 1030
-	data8 0				// link
-	data8 0				// unlink
-	data8 0				// execve
-	data8 0				// chdir
-	data8 0				// fchdir		// 1035
-	data8 0				// utimes
-	data8 0				// mknod
-	data8 0				// chmod
-	data8 0				// chown
-	data8 0				// lseek		// 1040
-	data8 fsys_getpid		// getpid
-	data8 fsys_getppid		// getppid
-	data8 0				// mount
-	data8 0				// umount
-	data8 0				// setuid		// 1045
-	data8 0				// getuid
-	data8 0				// geteuid
-	data8 0				// ptrace
-	data8 0				// access
-	data8 0				// sync			// 1050
-	data8 0				// fsync
-	data8 0				// fdatasync
-	data8 0				// kill
-	data8 0				// rename
-	data8 0				// mkdir		// 1055
-	data8 0				// rmdir
-	data8 0				// dup
-	data8 0				// pipe
-	data8 0				// times
-	data8 0				// brk			// 1060
-	data8 0				// setgid
-	data8 0				// getgid
-	data8 0				// getegid
-	data8 0				// acct
-	data8 0				// ioctl		// 1065
-	data8 0				// fcntl
-	data8 0				// umask
-	data8 0				// chroot
-	data8 0				// ustat
-	data8 0				// dup2			// 1070
-	data8 0				// setreuid
-	data8 0				// setregid
-	data8 0				// getresuid
-	data8 0				// setresuid
-	data8 0				// getresgid		// 1075
-	data8 0				// setresgid
-	data8 0				// getgroups
-	data8 0				// setgroups
-	data8 0				// getpgid
-	data8 0				// setpgid		// 1080
-	data8 0				// setsid
-	data8 0				// getsid
-	data8 0				// sethostname
-	data8 0				// setrlimit
-	data8 0				// getrlimit		// 1085
-	data8 0				// getrusage
-	data8 fsys_gettimeofday		// gettimeofday
-	data8 0				// settimeofday
-	data8 0				// select
-	data8 0				// poll			// 1090
-	data8 0				// symlink
-	data8 0				// readlink
-	data8 0				// uselib
-	data8 0				// swapon
-	data8 0				// swapoff		// 1095
-	data8 0				// reboot
-	data8 0				// truncate
-	data8 0				// ftruncate
-	data8 0				// fchmod
-	data8 0				// fchown		// 1100
-	data8 0				// getpriority
-	data8 0				// setpriority
-	data8 0				// statfs
-	data8 0				// fstatfs
-	data8 0				// gettid		// 1105
-	data8 0				// semget
-	data8 0				// semop
-	data8 0				// semctl
-	data8 0				// msgget
-	data8 0				// msgsnd		// 1110
-	data8 0				// msgrcv
-	data8 0				// msgctl
-	data8 0				// shmget
-	data8 0				// shmat
-	data8 0				// shmdt		// 1115
-	data8 0				// shmctl
-	data8 0				// syslog
-	data8 0				// setitimer
-	data8 0				// getitimer
-	data8 0					 		// 1120
-	data8 0
-	data8 0
-	data8 0				// vhangup
-	data8 0				// lchown
-	data8 0				// remap_file_pages	// 1125
-	data8 0				// wait4
-	data8 0				// sysinfo
-	data8 0				// clone
-	data8 0				// setdomainname
-	data8 0				// newuname		// 1130
-	data8 0				// adjtimex
-	data8 0
-	data8 0				// init_module
-	data8 0				// delete_module
-	data8 0							// 1135
-	data8 0
-	data8 0				// quotactl
-	data8 0				// bdflush
-	data8 0				// sysfs
-	data8 0				// personality		// 1140
-	data8 0				// afs_syscall
-	data8 0				// setfsuid
-	data8 0				// setfsgid
-	data8 0				// getdents
-	data8 0				// flock		// 1145
-	data8 0				// readv
-	data8 0				// writev
-	data8 0				// pread64
-	data8 0				// pwrite64
-	data8 0				// sysctl		// 1150
-	data8 0				// mmap
-	data8 0				// munmap
-	data8 0				// mlock
-	data8 0				// mlockall
-	data8 0				// mprotect		// 1155
-	data8 0				// mremap
-	data8 0				// msync
-	data8 0				// munlock
-	data8 0				// munlockall
-	data8 0				// sched_getparam	// 1160
-	data8 0				// sched_setparam
-	data8 0				// sched_getscheduler
-	data8 0				// sched_setscheduler
-	data8 0				// sched_yield
-	data8 0				// sched_get_priority_max	// 1165
-	data8 0				// sched_get_priority_min
-	data8 0				// sched_rr_get_interval
-	data8 0				// nanosleep
-	data8 0				// nfsservctl
-	data8 0				// prctl		// 1170
-	data8 0				// getpagesize
-	data8 0				// mmap2
-	data8 0				// pciconfig_read
-	data8 0				// pciconfig_write
-	data8 0				// perfmonctl		// 1175
-	data8 0				// sigaltstack
-	data8 0				// rt_sigaction
-	data8 0				// rt_sigpending
-	data8 fsys_rt_sigprocmask	// rt_sigprocmask
-	data8 0				// rt_sigqueueinfo	// 1180
-	data8 0				// rt_sigreturn
-	data8 0				// rt_sigsuspend
-	data8 0				// rt_sigtimedwait
-	data8 0				// getcwd
-	data8 0				// capget		// 1185
-	data8 0				// capset
-	data8 0				// sendfile
-	data8 0
-	data8 0
-	data8 0				// socket		// 1190
-	data8 0				// bind
-	data8 0				// connect
-	data8 0				// listen
-	data8 0				// accept
-	data8 0				// getsockname		// 1195
-	data8 0				// getpeername
-	data8 0				// socketpair
-	data8 0				// send
-	data8 0				// sendto
-	data8 0				// recv			// 1200
-	data8 0				// recvfrom
-	data8 0				// shutdown
-	data8 0				// setsockopt
-	data8 0				// getsockopt
-	data8 0				// sendmsg		// 1205
-	data8 0				// recvmsg
-	data8 0				// pivot_root
-	data8 0				// mincore
-	data8 0				// madvise
-	data8 0				// newstat		// 1210
-	data8 0				// newlstat
-	data8 0				// newfstat
-	data8 0				// clone2
-	data8 0				// getdents64
-	data8 0				// getunwind		// 1215
-	data8 0				// readahead
-	data8 0				// setxattr
-	data8 0				// lsetxattr
-	data8 0				// fsetxattr
-	data8 0				// getxattr		// 1220
-	data8 0				// lgetxattr
-	data8 0				// fgetxattr
-	data8 0				// listxattr
-	data8 0				// llistxattr
-	data8 0				// flistxattr		// 1225
-	data8 0				// removexattr
-	data8 0				// lremovexattr
-	data8 0				// fremovexattr
-	data8 0				// tkill
-	data8 0				// futex		// 1230
-	data8 0				// sched_setaffinity
-	data8 0				// sched_getaffinity
-	data8 fsys_set_tid_address	// set_tid_address
-	data8 0				// fadvise64_64
-	data8 0				// tgkill		// 1235
-	data8 0				// exit_group
-	data8 0				// lookup_dcookie
-	data8 0				// io_setup
-	data8 0				// io_destroy
-	data8 0				// io_getevents		// 1240
-	data8 0				// io_submit
-	data8 0				// io_cancel
-	data8 0				// epoll_create
-	data8 0				// epoll_ctl
-	data8 0				// epoll_wait		// 1245
-	data8 0				// restart_syscall
-	data8 0				// semtimedop
-	data8 0				// timer_create
-	data8 0				// timer_settime
-	data8 0				// timer_gettime 	// 1250
-	data8 0				// timer_getoverrun
-	data8 0				// timer_delete
-	data8 0				// clock_settime
-	data8 fsys_clock_gettime	// clock_gettime
-
-	// fill in zeros for the remaining entries
-	.zero:
-	.space fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
deleted file mode 100644
index e242e36b04..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.S
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * This file contains the code that gets mapped at the upper end of each task's text
- * region.  For now, it contains the signal trampoline code only.
- *
- * Copyright (C) 1999-2003 Hewlett-Packard Co
- * 	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigcontext.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-# include <asm/privop.h>
-#endif
-
-/*
- * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
- * complications with the linker (which likes to create PLT stubs for branches
- * to targets outside the shared object) and to avoid multi-phase kernel builds, we
- * simply create minimalistic "patch lists" in special ELF sections.
- */
-	.section ".data.patch.fsyscall_table", "a"
-	.previous
-#define LOAD_FSYSCALL_TABLE(reg)			\
-[1:]	movl reg=0;					\
-	.xdata4 ".data.patch.fsyscall_table", 1b-.
-
-	.section ".data.patch.brl_fsys_bubble_down", "a"
-	.previous
-#define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
-[1:](pr)brl.cond.sptk 0;				\
-	.xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-	// The page in which hyperprivop lives must be pinned by ITR.
-	// However vDSO area isn't pinned. So issuing hyperprivop
-	// from vDSO page causes trouble that Kevin pointed out.
-	// After clearing vpsr.ic, the vcpu is pre-empted and the itlb
-	// is flushed. Then vcpu get cpu again, tlb miss fault occures.
-	// However it results in nested dtlb fault because vpsr.ic is off.
-	// To avoid such a situation, we jump into the kernel text area
-	// which is pinned, and then issue hyperprivop and return back
-	// to vDSO page.
-	// This is Dan Magenheimer's idea.
-
-	// Currently is_running_on_xen() is defined as running_on_xen.
-	// If is_running_on_xen() is a real function, we must update
-	// according to it.
-	.section ".data.patch.running_on_xen", "a"
-	.previous
-#define LOAD_RUNNING_ON_XEN(reg)			\
-[1:]	movl reg=0;					\
-	.xdata4 ".data.patch.running_on_xen", 1b-.
-
-	.section ".data.patch.brl_xen_ssm_i_0", "a"
-	.previous
-#define BRL_COND_XEN_SSM_I_0(pr)			\
-[1:](pr)brl.cond.sptk 0;				\
-	.xdata4 ".data.patch.brl_xen_ssm_i_0", 1b-.
-
-	.section ".data.patch.brl_xen_ssm_i_1", "a"
-	.previous
-#define BRL_COND_XEN_SSM_I_1(pr)			\
-[1:](pr)brl.cond.sptk 0;				\
-	.xdata4 ".data.patch.brl_xen_ssm_i_1", 1b-.
-#endif
-
-GLOBAL_ENTRY(__kernel_syscall_via_break)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * Note: for (fast) syscall restart to work, the break instruction must be
-	 *	 the first one in the bundle addressed by syscall_via_break.
-	 */
-{ .mib
-	break 0x100000
-	nop.i 0
-	br.ret.sptk.many b6
-}
-END(__kernel_syscall_via_break)
-
-/*
- * On entry:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	b6  = return address
- * On exit:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	all other "scratch" registers:	undefined
- *	all "preserved" registers:	same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
-	.prologue
-	.altrp b6
-	.body
-{
-	/*
-	 * Note: the kernel cannot assume that the first two instructions in this
-	 * bundle get executed.  The remaining code must be safe even if
-	 * they do not get executed.
-	 */
-	adds r17=-1024,r15			// A
-	mov r10=0				// A    default to successful syscall execution
-	epc					// B	causes split-issue
-}
-	;;
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-	// r20 = 1
-	// r22 = &vcpu->vcpu_info->evtchn_upcall_mask
-	// r23 = &vpsr.ic
-	// r24 = &vcpu->vcpu_info->evtchn_upcall_pending
-	// r25 = tmp
-	// r28 = &running_on_xen
-	// r30 = running_on_xen
-	// r31 = tmp
-	// p11 = tmp
-	// p12 = running_on_xen
-	// p13 = !running_on_xen
-	// p14 = tmp
-	// p15 = tmp
-#define isXen	p12
-#define isRaw	p13
-	LOAD_RUNNING_ON_XEN(r28)
-	movl r22=XSI_PSR_I_ADDR
-	;;
-	ld8 r22=[r22]
-	;;
-	movl r23=XSI_PSR_IC
-	adds r24=-1,r22
-	mov r20=1
-	;;
-	ld4 r30=[r28]
-	;;
-	cmp.ne isXen,isRaw=r0,r30
-	;;
-(isRaw)	rsm psr.be | psr.i
-(isXen)	st1 [r22]=r20
-(isXen)	rum psr.be
-	;;
-#else
-	rsm psr.be | psr.i			// M2 (5 cyc to srlz.d)
-#endif
-	LOAD_FSYSCALL_TABLE(r14)		// X
-	;;
-	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
-	shladd r18=r17,3,r14			// A
-	mov r19=NR_syscalls-1			// A
-	;;
-	lfetch [r18]				// M0|1
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-(isRaw)	mov r29=psr
-(isXen)	XEN_HYPER_GET_PSR
-	;;
-(isXen)	mov r29=r8
-#else
-	mov r29=psr				// M2 (12 cyc)
-#endif
-	// If r17 is a NaT, p6 will be zero
-	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
-	;;
-	mov r21=ar.fpsr				// M2 (12 cyc)
-	tnat.nz p10,p9=r15			// I0
-	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
-	;;
-	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
-(p6)	ld8 r18=[r18]				// M0|1
-	nop.i 0
-	;;
-	nop.m 0
-(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-	;;
-	// p14 = running_on_xen && p8
-	// p15 = !running_on_xen && p8
-(p8)	cmp.ne.unc p14,p15=r0,r30
-	;;
-(p15)	ssm psr.i
-	BRL_COND_XEN_SSM_I_0(p14)
-	.global .vdso_ssm_i_0_ret
-.vdso_ssm_i_0_ret:
-#else
-	nop.i 0
-	;;
-(p8)	ssm psr.i
-#endif
-(p6)	mov b7=r18				// I0
-(p8)	br.dptk.many b7				// B
-
-	mov r27=ar.rsc				// M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
- * future version of the linker.  In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
-	;;
-(p6)	mov b7=r14
-(p6)	br.sptk.many b7
-#else
-	BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-(isRaw)	ssm psr.i
-	BRL_COND_XEN_SSM_I_1(isXen)
-	.global .vdso_ssm_i_1_ret
-.vdso_ssm_i_1_ret:
-#else
-	ssm psr.i
-#endif
-	mov r10=-1
-(p10)	mov r8=EINVAL
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-	dv_serialize_data // shut up gas warning.
-		          // we know xen_hyper_ssm_i_0 or xen_hyper_ssm_i_1
-		          // doesn't change p9 and p10
-#endif
-(p9)	mov r8=ENOSYS
-	FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
-#	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
-#	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
-#	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
-#	define SIGHANDLER_OFF	(16 + IA64_SIGFRAME_HANDLER_OFFSET)
-#	define SIGCONTEXT_OFF	(16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
-
-#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
-#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
-#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
-#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
-#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
-#	define UNAT_OFF		IA64_SIGCONTEXT_AR_UNAT_OFFSET
-#	define FPSR_OFF		IA64_SIGCONTEXT_AR_FPSR_OFFSET
-#	define PR_OFF		IA64_SIGCONTEXT_PR_OFFSET
-#	define RP_OFF		IA64_SIGCONTEXT_IP_OFFSET
-#	define SP_OFF		IA64_SIGCONTEXT_R12_OFFSET
-#	define RBS_BASE_OFF	IA64_SIGCONTEXT_RBS_BASE_OFFSET
-#	define LOADRS_OFF	IA64_SIGCONTEXT_LOADRS_OFFSET
-#	define base0		r2
-#	define base1		r3
-	/*
-	 * When we get here, the memory stack looks like this:
-	 *
-	 *   +===============================+
-       	 *   |				     |
-       	 *   //	    struct sigframe          //
-       	 *   |				     |
-	 *   +-------------------------------+ <-- sp+16
-	 *   |      16 byte of scratch       |
-	 *   |            space              |
-	 *   +-------------------------------+ <-- sp
-	 *
-	 * The register stack looks _exactly_ the way it looked at the time the signal
-	 * occurred.  In other words, we're treading on a potential mine-field: each
-	 * incoming general register may be a NaT value (including sp, in which case the
-	 * process ends up dying with a SIGSEGV).
-	 *
-	 * The first thing need to do is a cover to get the registers onto the backing
-	 * store.  Once that is done, we invoke the signal handler which may modify some
-	 * of the machine state.  After returning from the signal handler, we return
-	 * control to the previous context by executing a sigreturn system call.  A signal
-	 * handler may call the rt_sigreturn() function to directly return to a given
-	 * sigcontext.  However, the user-level sigreturn() needs to do much more than
-	 * calling the rt_sigreturn() system call as it needs to unwind the stack to
-	 * restore preserved registers that may have been saved on the signal handler's
-	 * call stack.
-	 */
-
-#define SIGTRAMP_SAVES										\
-	.unwabi 3, 's';		/* mark this as a sigtramp handler (saves scratch regs) */	\
-	.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */	\
-	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF;						\
-	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF;						\
-	.savesp pr, PR_OFF+SIGCONTEXT_OFF;     							\
-	.savesp rp, RP_OFF+SIGCONTEXT_OFF;							\
-	.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF;							\
-	.vframesp SP_OFF+SIGCONTEXT_OFF
-
-GLOBAL_ENTRY(__kernel_sigtramp)
-	// describe the state that is active when we get here:
-	.prologue
-	SIGTRAMP_SAVES
-	.body
-
-	.label_state 1
-
-	adds base0=SIGHANDLER_OFF,sp
-	adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
-	br.call.sptk.many rp=1f
-1:
-	ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF)	// get pointer to signal handler's plabel
-	ld8 r15=[base1]					// get address of new RBS base (or NULL)
-	cover				// push args in interrupted frame onto backing store
-	;;
-	cmp.ne p1,p0=r15,r0		// do we need to switch rbs? (note: pr is saved by kernel)
-	mov.m r9=ar.bsp			// fetch ar.bsp
-	.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-(p1)	br.cond.spnt setup_rbs		// yup -> (clobbers p8, r14-r16, and r18-r20)
-back_from_setup_rbs:
-	alloc r8=ar.pfs,0,0,3,0
-	ld8 out0=[base0],16		// load arg0 (signum)
-	adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
-	;;
-	ld8 out1=[base1]		// load arg1 (siginfop)
-	ld8 r10=[r17],8			// get signal handler entry point
-	;;
-	ld8 out2=[base0]		// load arg2 (sigcontextp)
-	ld8 gp=[r17]			// get signal handler's global pointer
-	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
-	;;
-	.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
-	st8 [base0]=r9			// save sc_ar_bsp
-	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
-	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
-	;;
-	stf.spill [base0]=f6,32
-	stf.spill [base1]=f7,32
-	;;
-	stf.spill [base0]=f8,32
-	stf.spill [base1]=f9,32
-	mov b6=r10
-	;;
-	stf.spill [base0]=f10,32
-	stf.spill [base1]=f11,32
-	;;
-	stf.spill [base0]=f12,32
-	stf.spill [base1]=f13,32
-	;;
-	stf.spill [base0]=f14,32
-	stf.spill [base1]=f15,32
-	br.call.sptk.many rp=b6			// call the signal handler
-.ret0:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
-	;;
-	ld8 r15=[base0]				// fetch sc_ar_bsp
-	mov r14=ar.bsp
-	;;
-	cmp.ne p1,p0=r14,r15			// do we need to restore the rbs?
-(p1)	br.cond.spnt restore_rbs		// yup -> (clobbers r14-r18, f6 & f7)
-	;;
-back_from_restore_rbs:
-	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
-	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
-	;;
-	ldf.fill f6=[base0],32
-	ldf.fill f7=[base1],32
-	;;
-	ldf.fill f8=[base0],32
-	ldf.fill f9=[base1],32
-	;;
-	ldf.fill f10=[base0],32
-	ldf.fill f11=[base1],32
-	;;
-	ldf.fill f12=[base0],32
-	ldf.fill f13=[base1],32
-	;;
-	ldf.fill f14=[base0],32
-	ldf.fill f15=[base1],32
-	mov r15=__NR_rt_sigreturn
-	.restore sp				// pop .prologue
-	break __BREAK_SYSCALL
-
-	.prologue
-	SIGTRAMP_SAVES
-setup_rbs:
-	mov ar.rsc=0				// put RSE into enforced lazy mode
-	;;
-	.save ar.rnat, r19
-	mov r19=ar.rnat				// save RNaT before switching backing store area
-	adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
-
-	mov r18=ar.bspstore
-	mov ar.bspstore=r15			// switch over to new register backing store area
-	;;
-
-	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-	st8 [r14]=r19				// save sc_ar_rnat
-	.body
-	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
-	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
-	;;
-	invala
-	sub r15=r16,r15
-	extr.u r20=r18,3,6
-	;;
-	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
-	cmp.eq p8,p0=63,r20
-	shl r15=r15,16
-	;;
-	st8 [r14]=r15				// save sc_loadrs
-(p8)	st8 [r18]=r19		// if bspstore points at RNaT slot, store RNaT there now
-	.restore sp				// pop .prologue
-	br.cond.sptk back_from_setup_rbs
-
-	.prologue
-	SIGTRAMP_SAVES
-	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-	.body
-restore_rbs:
-	// On input:
-	//	r14 = bsp1 (bsp at the time of return from signal handler)
-	//	r15 = bsp0 (bsp at the time the signal occurred)
-	//
-	// Here, we need to calculate bspstore0, the value that ar.bspstore needs
-	// to be set to, based on bsp0 and the size of the dirty partition on
-	// the alternate stack (sc_loadrs >> 16).  This can be done with the
-	// following algorithm:
-	//
-	//  bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
-	//
-	// This is what the code below does.
-	//
-	alloc r2=ar.pfs,0,0,0,0			// alloc null frame
-	adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
-	adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
-	;;
-	ld8 r17=[r16]
-	ld8 r16=[r18]			// get new rnat
-	extr.u r18=r15,3,6	// r18 <- rse_slot_num(bsp0)
-	;;
-	mov ar.rsc=r17			// put RSE into enforced lazy mode
-	shr.u r17=r17,16
-	;;
-	sub r14=r14,r17		// r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
-	shr.u r17=r17,3		// r17 <- (sc_loadrs >> 19)
-	;;
-	loadrs			// restore dirty partition
-	extr.u r14=r14,3,6	// r14 <- rse_slot_num(bspstore1)
-	;;
-	add r14=r14,r17		// r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
-	;;
-	shr.u r14=r14,6		// r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
-	;;
-	sub r14=r14,r17		// r14 <- -rse_num_regs(bspstore1, bsp1)
-	movl r17=0x8208208208208209
-	;;
-	add r18=r18,r14		// r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
-	setf.sig f7=r17
-	cmp.lt p7,p0=r14,r0	// p7 <- (r14 < 0)?
-	;;
-(p7)	adds r18=-62,r18	// delta -= 62
-	;;
-	setf.sig f6=r18
-	;;
-	xmpy.h f6=f6,f7
-	;;
-	getf.sig r17=f6
-	;;
-	add r17=r17,r18
-	shr r18=r18,63
-	;;
-	shr r17=r17,5
-	;;
-	sub r17=r17,r18		// r17 = delta/63
-	;;
-	add r17=r14,r17		// r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
-	;;
-	shladd r15=r17,3,r15	// r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
-	;;
-	mov ar.bspstore=r15			// switch back to old register backing store area
-	;;
-	mov ar.rnat=r16				// restore RNaT
-	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
-	// invala not necessary as that will happen when returning to user-mode
-	br.cond.sptk back_from_restore_rbs
-END(__kernel_sigtramp)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
deleted file mode 100644
index 58582ccdfe..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Linker script for gate DSO.  The gate pages are an ELF shared object prelinked to its
- * virtual address, with only one read-only segment and one execute-only segment (both fit
- * in one page).  This script controls its layout.
- */
-
-
-#include <asm/system.h>
-
-SECTIONS
-{
-  . = GATE_ADDR + SIZEOF_HEADERS;
-
-  .hash				: { *(.hash) }				:readable
-  .gnu.hash			: { *(.gnu.hash) }
-  .dynsym			: { *(.dynsym) }
-  .dynstr			: { *(.dynstr) }
-  .gnu.version			: { *(.gnu.version) }
-  .gnu.version_d		: { *(.gnu.version_d) }
-  .gnu.version_r		: { *(.gnu.version_r) }
-  .dynamic			: { *(.dynamic) }			:readable :dynamic
-
-  /*
-   * This linker script is used both with -r and with -shared.  For the layouts to match,
-   * we need to skip more than enough space for the dynamic symbol table et al.  If this
-   * amount is insufficient, ld -shared will barf.  Just increase it here.
-   */
-  . = GATE_ADDR + 0x500;
-
-  .data.patch			: {
-				    __start_gate_mckinley_e9_patchlist = .;
-				    *(.data.patch.mckinley_e9)
-				    __end_gate_mckinley_e9_patchlist = .;
-
-				    __start_gate_vtop_patchlist = .;
-				    *(.data.patch.vtop)
-				    __end_gate_vtop_patchlist = .;
-
-				    __start_gate_fsyscall_patchlist = .;
-				    *(.data.patch.fsyscall_table)
-				    __end_gate_fsyscall_patchlist = .;
-
-				    __start_gate_brl_fsys_bubble_down_patchlist = .;
-				    *(.data.patch.brl_fsys_bubble_down)
-				    __end_gate_brl_fsys_bubble_down_patchlist = .;
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-				    __start_gate_running_on_xen_patchlist = .;
-				    *(.data.patch.running_on_xen)
-				    __end_gate_running_on_xen_patchlist = .;
-
-				    __start_gate_brl_xen_ssm_i_0_patchlist = .;
-				    *(.data.patch.brl_xen_ssm_i_0)
-				    __end_gate_brl_xen_ssm_i_0_patchlist = .;
-
-				    __start_gate_brl_xen_ssm_i_1_patchlist = .;
-				    *(.data.patch.brl_xen_ssm_i_1)
-				    __end_gate_brl_xen_ssm_i_1_patchlist = .;
-#endif
-  }									:readable
-  .IA_64.unwind_info		: { *(.IA_64.unwind_info*) }
-  .IA_64.unwind			: { *(.IA_64.unwind*) }			:readable :unwind
-#ifdef HAVE_BUGGY_SEGREL
-  .text (GATE_ADDR + PAGE_SIZE)	: { *(.text) *(.text.*) }		:readable
-#else
-  . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
-  .text				: { *(.text) *(.text.*) }		:epc
-#endif
-
-  /DISCARD/			: {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-	*(__ex_table)
-	*(__mca_table)
-  }
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-  readable  PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
-#ifndef HAVE_BUGGY_SEGREL
-  epc	    PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
-#endif
-  dynamic   PT_DYNAMIC			FLAGS(4);	/* PF_R */
-  unwind    0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-  LINUX_2.5 {
-    global:
-	__kernel_syscall_via_break;
-	__kernel_syscall_via_epc;
-	__kernel_sigtramp;
-
-    local: *;
-  };
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_syscall_via_epc)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S b/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
deleted file mode 100644
index dded6f24f1..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/head.S
+++ /dev/null
@@ -1,1229 +0,0 @@
-/*
- * Here is where the ball gets rolling as far as the kernel is concerned.
- * When control is transferred to _start, the bootload has already
- * loaded us to the correct address.  All that's left to do here is
- * to set up the kernel's global pointer and jump to the kernel
- * entry point.
- *
- * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *	Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
- * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
- *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
- * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
- *   Support for CPU Hotplug
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/fpu.h>
-#include <asm/kregs.h>
-#include <asm/mmu_context.h>
-#include <asm/asm-offsets.h>
-#include <asm/pal.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/mca_asm.h>
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define SAL_PSR_BITS_TO_SET				\
-	(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
-
-#define SAVE_FROM_REG(src, ptr, dest)	\
-	mov dest=src;;						\
-	st8 [ptr]=dest,0x08
-
-#define RESTORE_REG(reg, ptr, _tmp)		\
-	ld8 _tmp=[ptr],0x08;;				\
-	mov reg=_tmp
-
-#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
-	mov ar.lc=IA64_NUM_DBG_REGS-1;; 			\
-	mov _idx=0;; 								\
-1: 												\
-	SAVE_FROM_REG(_breg[_idx], ptr, _dest);;	\
-	add _idx=1,_idx;;							\
-	br.cloop.sptk.many 1b
-
-#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
-	mov ar.lc=IA64_NUM_DBG_REGS-1;;			\
-	mov _idx=0;;							\
-_lbl:  RESTORE_REG(_breg[_idx], ptr, _tmp);;	\
-	add _idx=1, _idx;;						\
-	br.cloop.sptk.many _lbl
-
-#define SAVE_ONE_RR(num, _reg, _tmp) \
-	movl _tmp=(num<<61);;	\
-	mov _reg=rr[_tmp]
-
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
-	SAVE_ONE_RR(0,_r0, _tmp);; \
-	SAVE_ONE_RR(1,_r1, _tmp);; \
-	SAVE_ONE_RR(2,_r2, _tmp);; \
-	SAVE_ONE_RR(3,_r3, _tmp);; \
-	SAVE_ONE_RR(4,_r4, _tmp);; \
-	SAVE_ONE_RR(5,_r5, _tmp);; \
-	SAVE_ONE_RR(6,_r6, _tmp);; \
-	SAVE_ONE_RR(7,_r7, _tmp);;
-
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
-	st8 [ptr]=_r0, 8;; \
-	st8 [ptr]=_r1, 8;; \
-	st8 [ptr]=_r2, 8;; \
-	st8 [ptr]=_r3, 8;; \
-	st8 [ptr]=_r4, 8;; \
-	st8 [ptr]=_r5, 8;; \
-	st8 [ptr]=_r6, 8;; \
-	st8 [ptr]=_r7, 8;;
-
-#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
-	mov		ar.lc=0x08-1;;						\
-	movl	_idx1=0x00;;						\
-RestRR:											\
-	dep.z	_idx2=_idx1,61,3;;					\
-	ld8		_tmp=[ptr],8;;						\
-	mov		rr[_idx2]=_tmp;;					\
-	srlz.d;;									\
-	add		_idx1=1,_idx1;;						\
-	br.cloop.sptk.few	RestRR
-
-#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
-	movl reg1=sal_state_for_booting_cpu;;	\
-	ld8 reg2=[reg1];;
-
-/*
- * Adjust region registers saved before starting to save
- * break regs and rest of the states that need to be preserved.
- */
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred)  \
-	SAVE_FROM_REG(b0,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b1,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b2,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b3,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b4,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b5,_reg1,_reg2);;						\
-	st8 [_reg1]=r1,0x08;;								\
-	st8 [_reg1]=r12,0x08;;								\
-	st8 [_reg1]=r13,0x08;;								\
-	SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.pfs,_reg1,_reg2);;					\
-	SAVE_FROM_REG(ar.rnat,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.unat,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);;			\
-	SAVE_FROM_REG(cr.dcr,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.iva,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.pta,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.itv,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.pmv,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);;				\
-	SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);;				\
-	SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);;				\
-	st8 [_reg1]=r4,0x08;;								\
-	st8 [_reg1]=r5,0x08;;								\
-	st8 [_reg1]=r6,0x08;;								\
-	st8 [_reg1]=r7,0x08;;								\
-	st8 [_reg1]=_pred,0x08;;							\
-	SAVE_FROM_REG(ar.lc, _reg1, _reg2);;				\
-	stf.spill.nta [_reg1]=f2,16;;						\
-	stf.spill.nta [_reg1]=f3,16;;						\
-	stf.spill.nta [_reg1]=f4,16;;						\
-	stf.spill.nta [_reg1]=f5,16;;						\
-	stf.spill.nta [_reg1]=f16,16;;						\
-	stf.spill.nta [_reg1]=f17,16;;						\
-	stf.spill.nta [_reg1]=f18,16;;						\
-	stf.spill.nta [_reg1]=f19,16;;						\
-	stf.spill.nta [_reg1]=f20,16;;						\
-	stf.spill.nta [_reg1]=f21,16;;						\
-	stf.spill.nta [_reg1]=f22,16;;						\
-	stf.spill.nta [_reg1]=f23,16;;						\
-	stf.spill.nta [_reg1]=f24,16;;						\
-	stf.spill.nta [_reg1]=f25,16;;						\
-	stf.spill.nta [_reg1]=f26,16;;						\
-	stf.spill.nta [_reg1]=f27,16;;						\
-	stf.spill.nta [_reg1]=f28,16;;						\
-	stf.spill.nta [_reg1]=f29,16;;						\
-	stf.spill.nta [_reg1]=f30,16;;						\
-	stf.spill.nta [_reg1]=f31,16;;
-
-#else
-#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#endif
-
-#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
-	movl _tmp1=(num << 61);;	\
-	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
-	mov rr[_tmp1]=_tmp2
-
-	.section __special_page_section,"ax"
-
-	.global empty_zero_page
-empty_zero_page:
-	.skip PAGE_SIZE
-
-	.global swapper_pg_dir
-swapper_pg_dir:
-	.skip PAGE_SIZE
-
-	.rodata
-halt_msg:
-	stringz "Halting kernel\n"
-
-	.text
-
-	.global start_ap
-
-	/*
-	 * Start the kernel.  When the bootloader passes control to _start(), r28
-	 * points to the address of the boot parameter area.  Execution reaches
-	 * here in physical mode.
-	 */
-GLOBAL_ENTRY(_start)
-start_ap:
-	.prologue
-	.save rp, r0		// terminate unwind chain with a NULL rp
-	.body
-
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	/*
-	 * Save the region registers, predicate before they get clobbered
-	 */
-	SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
-	mov r25=pr;;
-
-	/*
-	 * Initialize kernel region registers:
-	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
-	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
-	 * We initialize all of them to prevent inadvertently assuming
-	 * something about the state of address translation early in boot.
-	 */
-	SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
-	SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
-	/*
-	 * Now pin mappings into the TLB for kernel text and data
-	 */
-	mov r18=KERNEL_TR_PAGE_SHIFT<<2
-	movl r17=KERNEL_START
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r17
-	mov r16=IA64_TR_KERNEL
-	mov r3=ip
-	movl r18=PAGE_KERNEL
-	;;
-	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
-	;;
-	or r18=r2,r18
-	;;
-	srlz.i
-	;;
-	itr.i itr[r16]=r18
-	;;
-	itr.d dtr[r16]=r18
-	;;
-	srlz.i
-
-	/*
-	 * Switch into virtual mode:
-	 */
-	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
-		  |IA64_PSR_DI)
-	;;
-	mov cr.ipsr=r16
-	movl r17=1f
-	;;
-	mov cr.iip=r17
-	mov cr.ifs=r0
-	;;
-	rfi
-	;;
-1:	// now we are in virtual mode
-
-	SET_AREA_FOR_BOOTING_CPU(r2, r16);
-
-	STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
-	SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
-	;;
-
-	// set IVT entry point---can't access I/O ports without it
-	movl r3=ia64_ivt
-	;;
-	mov cr.iva=r3
-	movl r2=FPSR_DEFAULT
-	;;
-	srlz.i
-	movl gp=__gp
-
-	mov ar.fpsr=r2
-	;;
-
-#define isAP	p2	// are we an Application Processor?
-#define isBP	p3	// are we the Bootstrap Processor?
-
-#ifdef CONFIG_SMP
-	/*
-	 * Find the init_task for the currently booting CPU.  At poweron, and in
-	 * UP mode, task_for_booting_cpu is NULL.
-	 */
-	movl r3=task_for_booting_cpu
- 	;;
-	ld8 r3=[r3]
-	movl r2=init_task
-	;;
-	cmp.eq isBP,isAP=r3,r0
-	;;
-(isAP)	mov r2=r3
-#else
-	movl r2=init_task
-	cmp.eq isBP,isAP=r0,r0
-#endif
-	;;
-	tpa r3=r2		// r3 == phys addr of task struct
-	mov r16=-1
-(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
-
-	// load mapping for stack (virtaddr in r2, physaddr in r3)
-	rsm psr.ic
-	movl r17=PAGE_KERNEL
-	;;
-	srlz.d
-	dep r18=0,r3,0,12
-	;;
-	or r18=r17,r18
-	dep r2=-1,r3,61,3	// IMVA of task
-	;;
-	mov r17=rr[r2]
-	shr.u r16=r3,IA64_GRANULE_SHIFT
-	;;
-	dep r17=0,r17,8,24
-	;;
-	mov cr.itir=r17
-	mov cr.ifa=r2
-
-	mov r19=IA64_TR_CURRENT_STACK
-	;;
-	itr.d dtr[r19]=r18
-	;;
-	ssm psr.ic
-	srlz.d
-  	;;
-
-.load_current:
-	// load the "current" pointer (r13) and ar.k6 with the current task
-	mov IA64_KR(CURRENT)=r2		// virtual address
-	mov IA64_KR(CURRENT_STACK)=r16
-	mov r13=r2
-	/*
-	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel
-	 * threads don't store interesting values in that structure, but the space
-	 * still needs to be there because time-critical stuff such as the context
-	 * switching can be implemented more efficiently (for example, __switch_to()
-	 * always sets the psr.dfh bit of the task it is switching to).
-	 */
-
-	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
-	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
-	mov ar.rsc=0		// place RSE in enforced lazy mode
-	;;
-	loadrs			// clear the dirty partition
-	mov IA64_KR(PER_CPU_DATA)=r0	// clear physical per-CPU base
-	;;
-	mov ar.bspstore=r2	// establish the new RSE stack
-	;;
-	mov ar.rsc=0x3		// place RSE in eager mode
-
-(isBP)	dep r28=-1,r28,61,3	// make address virtual
-(isBP)	movl r2=ia64_boot_param
-	;;
-(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
-
-#ifdef CONFIG_XEN
-	//  Note: isBP is used by the subprogram.
-	br.call.sptk.many rp=early_xen_setup
-	;;
-#endif
-
-#ifdef CONFIG_SMP
-(isAP)	br.call.sptk.many rp=start_secondary
-.ret0:
-(isAP)	br.cond.sptk self
-#endif
-
-	// This is executed by the bootstrap processor (bsp) only:
-
-#ifdef CONFIG_IA64_FW_EMU
-	// initialize PAL & SAL emulator:
-	br.call.sptk.many rp=sys_fw_init
-.ret1:
-#endif
-	br.call.sptk.many rp=start_kernel
-.ret2:	addl r3=@ltoff(halt_msg),gp
-	;;
-	alloc r2=ar.pfs,8,0,2,0
-	;;
-	ld8 out0=[r3]
-	br.call.sptk.many b0=console_print
-
-self:	hint @pause
-	br.sptk.many self		// endless loop
-END(_start)
-
-GLOBAL_ENTRY(ia64_save_debug_regs)
-	alloc r16=ar.pfs,1,0,0,0
-	mov r20=ar.lc			// preserve ar.lc
-	mov ar.lc=IA64_NUM_DBG_REGS-1
-	mov r18=0
-	add r19=IA64_NUM_DBG_REGS*8,in0
-	;;
-1:	mov r16=dbr[r18]
-#ifdef CONFIG_ITANIUM
-	;;
-	srlz.d
-#endif
-	mov r17=ibr[r18]
-	add r18=1,r18
-	;;
-	st8.nta [in0]=r16,8
-	st8.nta [r19]=r17,8
-	br.cloop.sptk.many 1b
-	;;
-	mov ar.lc=r20			// restore ar.lc
-	br.ret.sptk.many rp
-END(ia64_save_debug_regs)
-
-GLOBAL_ENTRY(ia64_load_debug_regs)
-	alloc r16=ar.pfs,1,0,0,0
-	lfetch.nta [in0]
-	mov r20=ar.lc			// preserve ar.lc
-	add r19=IA64_NUM_DBG_REGS*8,in0
-	mov ar.lc=IA64_NUM_DBG_REGS-1
-	mov r18=-1
-	;;
-1:	ld8.nta r16=[in0],8
-	ld8.nta r17=[r19],8
-	add r18=1,r18
-	;;
-	mov dbr[r18]=r16
-#ifdef CONFIG_ITANIUM
-	;;
-	srlz.d				// Errata 132 (NoFix status)
-#endif
-	mov ibr[r18]=r17
-	br.cloop.sptk.many 1b
-	;;
-	mov ar.lc=r20			// restore ar.lc
-	br.ret.sptk.many rp
-END(ia64_load_debug_regs)
-
-GLOBAL_ENTRY(__ia64_save_fpu)
-	alloc r2=ar.pfs,1,4,0,0
-	adds loc0=96*16-16,in0
-	adds loc1=96*16-16-128,in0
-	;;
-	stf.spill.nta [loc0]=f127,-256
-	stf.spill.nta [loc1]=f119,-256
-	;;
-	stf.spill.nta [loc0]=f111,-256
-	stf.spill.nta [loc1]=f103,-256
-	;;
-	stf.spill.nta [loc0]=f95,-256
-	stf.spill.nta [loc1]=f87,-256
-	;;
-	stf.spill.nta [loc0]=f79,-256
-	stf.spill.nta [loc1]=f71,-256
-	;;
-	stf.spill.nta [loc0]=f63,-256
-	stf.spill.nta [loc1]=f55,-256
-	adds loc2=96*16-32,in0
-	;;
-	stf.spill.nta [loc0]=f47,-256
-	stf.spill.nta [loc1]=f39,-256
-	adds loc3=96*16-32-128,in0
-	;;
-	stf.spill.nta [loc2]=f126,-256
-	stf.spill.nta [loc3]=f118,-256
-	;;
-	stf.spill.nta [loc2]=f110,-256
-	stf.spill.nta [loc3]=f102,-256
-	;;
-	stf.spill.nta [loc2]=f94,-256
-	stf.spill.nta [loc3]=f86,-256
-	;;
-	stf.spill.nta [loc2]=f78,-256
-	stf.spill.nta [loc3]=f70,-256
-	;;
-	stf.spill.nta [loc2]=f62,-256
-	stf.spill.nta [loc3]=f54,-256
-	adds loc0=96*16-48,in0
-	;;
-	stf.spill.nta [loc2]=f46,-256
-	stf.spill.nta [loc3]=f38,-256
-	adds loc1=96*16-48-128,in0
-	;;
-	stf.spill.nta [loc0]=f125,-256
-	stf.spill.nta [loc1]=f117,-256
-	;;
-	stf.spill.nta [loc0]=f109,-256
-	stf.spill.nta [loc1]=f101,-256
-	;;
-	stf.spill.nta [loc0]=f93,-256
-	stf.spill.nta [loc1]=f85,-256
-	;;
-	stf.spill.nta [loc0]=f77,-256
-	stf.spill.nta [loc1]=f69,-256
-	;;
-	stf.spill.nta [loc0]=f61,-256
-	stf.spill.nta [loc1]=f53,-256
-	adds loc2=96*16-64,in0
-	;;
-	stf.spill.nta [loc0]=f45,-256
-	stf.spill.nta [loc1]=f37,-256
-	adds loc3=96*16-64-128,in0
-	;;
-	stf.spill.nta [loc2]=f124,-256
-	stf.spill.nta [loc3]=f116,-256
-	;;
-	stf.spill.nta [loc2]=f108,-256
-	stf.spill.nta [loc3]=f100,-256
-	;;
-	stf.spill.nta [loc2]=f92,-256
-	stf.spill.nta [loc3]=f84,-256
-	;;
-	stf.spill.nta [loc2]=f76,-256
-	stf.spill.nta [loc3]=f68,-256
-	;;
-	stf.spill.nta [loc2]=f60,-256
-	stf.spill.nta [loc3]=f52,-256
-	adds loc0=96*16-80,in0
-	;;
-	stf.spill.nta [loc2]=f44,-256
-	stf.spill.nta [loc3]=f36,-256
-	adds loc1=96*16-80-128,in0
-	;;
-	stf.spill.nta [loc0]=f123,-256
-	stf.spill.nta [loc1]=f115,-256
-	;;
-	stf.spill.nta [loc0]=f107,-256
-	stf.spill.nta [loc1]=f99,-256
-	;;
-	stf.spill.nta [loc0]=f91,-256
-	stf.spill.nta [loc1]=f83,-256
-	;;
-	stf.spill.nta [loc0]=f75,-256
-	stf.spill.nta [loc1]=f67,-256
-	;;
-	stf.spill.nta [loc0]=f59,-256
-	stf.spill.nta [loc1]=f51,-256
-	adds loc2=96*16-96,in0
-	;;
-	stf.spill.nta [loc0]=f43,-256
-	stf.spill.nta [loc1]=f35,-256
-	adds loc3=96*16-96-128,in0
-	;;
-	stf.spill.nta [loc2]=f122,-256
-	stf.spill.nta [loc3]=f114,-256
-	;;
-	stf.spill.nta [loc2]=f106,-256
-	stf.spill.nta [loc3]=f98,-256
-	;;
-	stf.spill.nta [loc2]=f90,-256
-	stf.spill.nta [loc3]=f82,-256
-	;;
-	stf.spill.nta [loc2]=f74,-256
-	stf.spill.nta [loc3]=f66,-256
-	;;
-	stf.spill.nta [loc2]=f58,-256
-	stf.spill.nta [loc3]=f50,-256
-	adds loc0=96*16-112,in0
-	;;
-	stf.spill.nta [loc2]=f42,-256
-	stf.spill.nta [loc3]=f34,-256
-	adds loc1=96*16-112-128,in0
-	;;
-	stf.spill.nta [loc0]=f121,-256
-	stf.spill.nta [loc1]=f113,-256
-	;;
-	stf.spill.nta [loc0]=f105,-256
-	stf.spill.nta [loc1]=f97,-256
-	;;
-	stf.spill.nta [loc0]=f89,-256
-	stf.spill.nta [loc1]=f81,-256
-	;;
-	stf.spill.nta [loc0]=f73,-256
-	stf.spill.nta [loc1]=f65,-256
-	;;
-	stf.spill.nta [loc0]=f57,-256
-	stf.spill.nta [loc1]=f49,-256
-	adds loc2=96*16-128,in0
-	;;
-	stf.spill.nta [loc0]=f41,-256
-	stf.spill.nta [loc1]=f33,-256
-	adds loc3=96*16-128-128,in0
-	;;
-	stf.spill.nta [loc2]=f120,-256
-	stf.spill.nta [loc3]=f112,-256
-	;;
-	stf.spill.nta [loc2]=f104,-256
-	stf.spill.nta [loc3]=f96,-256
-	;;
-	stf.spill.nta [loc2]=f88,-256
-	stf.spill.nta [loc3]=f80,-256
-	;;
-	stf.spill.nta [loc2]=f72,-256
-	stf.spill.nta [loc3]=f64,-256
-	;;
-	stf.spill.nta [loc2]=f56,-256
-	stf.spill.nta [loc3]=f48,-256
-	;;
-	stf.spill.nta [loc2]=f40
-	stf.spill.nta [loc3]=f32
-	br.ret.sptk.many rp
-END(__ia64_save_fpu)
-
-GLOBAL_ENTRY(__ia64_load_fpu)
-	alloc r2=ar.pfs,1,2,0,0
-	adds r3=128,in0
-	adds r14=256,in0
-	adds r15=384,in0
-	mov loc0=512
-	mov loc1=-1024+16
-	;;
-	ldf.fill.nta f32=[in0],loc0
-	ldf.fill.nta f40=[ r3],loc0
-	ldf.fill.nta f48=[r14],loc0
-	ldf.fill.nta f56=[r15],loc0
-	;;
-	ldf.fill.nta f64=[in0],loc0
-	ldf.fill.nta f72=[ r3],loc0
-	ldf.fill.nta f80=[r14],loc0
-	ldf.fill.nta f88=[r15],loc0
-	;;
-	ldf.fill.nta f96=[in0],loc1
-	ldf.fill.nta f104=[ r3],loc1
-	ldf.fill.nta f112=[r14],loc1
-	ldf.fill.nta f120=[r15],loc1
-	;;
-	ldf.fill.nta f33=[in0],loc0
-	ldf.fill.nta f41=[ r3],loc0
-	ldf.fill.nta f49=[r14],loc0
-	ldf.fill.nta f57=[r15],loc0
-	;;
-	ldf.fill.nta f65=[in0],loc0
-	ldf.fill.nta f73=[ r3],loc0
-	ldf.fill.nta f81=[r14],loc0
-	ldf.fill.nta f89=[r15],loc0
-	;;
-	ldf.fill.nta f97=[in0],loc1
-	ldf.fill.nta f105=[ r3],loc1
-	ldf.fill.nta f113=[r14],loc1
-	ldf.fill.nta f121=[r15],loc1
-	;;
-	ldf.fill.nta f34=[in0],loc0
-	ldf.fill.nta f42=[ r3],loc0
-	ldf.fill.nta f50=[r14],loc0
-	ldf.fill.nta f58=[r15],loc0
-	;;
-	ldf.fill.nta f66=[in0],loc0
-	ldf.fill.nta f74=[ r3],loc0
-	ldf.fill.nta f82=[r14],loc0
-	ldf.fill.nta f90=[r15],loc0
-	;;
-	ldf.fill.nta f98=[in0],loc1
-	ldf.fill.nta f106=[ r3],loc1
-	ldf.fill.nta f114=[r14],loc1
-	ldf.fill.nta f122=[r15],loc1
-	;;
-	ldf.fill.nta f35=[in0],loc0
-	ldf.fill.nta f43=[ r3],loc0
-	ldf.fill.nta f51=[r14],loc0
-	ldf.fill.nta f59=[r15],loc0
-	;;
-	ldf.fill.nta f67=[in0],loc0
-	ldf.fill.nta f75=[ r3],loc0
-	ldf.fill.nta f83=[r14],loc0
-	ldf.fill.nta f91=[r15],loc0
-	;;
-	ldf.fill.nta f99=[in0],loc1
-	ldf.fill.nta f107=[ r3],loc1
-	ldf.fill.nta f115=[r14],loc1
-	ldf.fill.nta f123=[r15],loc1
-	;;
-	ldf.fill.nta f36=[in0],loc0
-	ldf.fill.nta f44=[ r3],loc0
-	ldf.fill.nta f52=[r14],loc0
-	ldf.fill.nta f60=[r15],loc0
-	;;
-	ldf.fill.nta f68=[in0],loc0
-	ldf.fill.nta f76=[ r3],loc0
-	ldf.fill.nta f84=[r14],loc0
-	ldf.fill.nta f92=[r15],loc0
-	;;
-	ldf.fill.nta f100=[in0],loc1
-	ldf.fill.nta f108=[ r3],loc1
-	ldf.fill.nta f116=[r14],loc1
-	ldf.fill.nta f124=[r15],loc1
-	;;
-	ldf.fill.nta f37=[in0],loc0
-	ldf.fill.nta f45=[ r3],loc0
-	ldf.fill.nta f53=[r14],loc0
-	ldf.fill.nta f61=[r15],loc0
-	;;
-	ldf.fill.nta f69=[in0],loc0
-	ldf.fill.nta f77=[ r3],loc0
-	ldf.fill.nta f85=[r14],loc0
-	ldf.fill.nta f93=[r15],loc0
-	;;
-	ldf.fill.nta f101=[in0],loc1
-	ldf.fill.nta f109=[ r3],loc1
-	ldf.fill.nta f117=[r14],loc1
-	ldf.fill.nta f125=[r15],loc1
-	;;
-	ldf.fill.nta f38 =[in0],loc0
-	ldf.fill.nta f46 =[ r3],loc0
-	ldf.fill.nta f54 =[r14],loc0
-	ldf.fill.nta f62 =[r15],loc0
-	;;
-	ldf.fill.nta f70 =[in0],loc0
-	ldf.fill.nta f78 =[ r3],loc0
-	ldf.fill.nta f86 =[r14],loc0
-	ldf.fill.nta f94 =[r15],loc0
-	;;
-	ldf.fill.nta f102=[in0],loc1
-	ldf.fill.nta f110=[ r3],loc1
-	ldf.fill.nta f118=[r14],loc1
-	ldf.fill.nta f126=[r15],loc1
-	;;
-	ldf.fill.nta f39 =[in0],loc0
-	ldf.fill.nta f47 =[ r3],loc0
-	ldf.fill.nta f55 =[r14],loc0
-	ldf.fill.nta f63 =[r15],loc0
-	;;
-	ldf.fill.nta f71 =[in0],loc0
-	ldf.fill.nta f79 =[ r3],loc0
-	ldf.fill.nta f87 =[r14],loc0
-	ldf.fill.nta f95 =[r15],loc0
-	;;
-	ldf.fill.nta f103=[in0]
-	ldf.fill.nta f111=[ r3]
-	ldf.fill.nta f119=[r14]
-	ldf.fill.nta f127=[r15]
-	br.ret.sptk.many rp
-END(__ia64_load_fpu)
-
-GLOBAL_ENTRY(__ia64_init_fpu)
-	stf.spill [sp]=f0		// M3
-	mov	 f32=f0			// F
-	nop.b	 0
-
-	ldfps	 f33,f34=[sp]		// M0
-	ldfps	 f35,f36=[sp]		// M1
-	mov      f37=f0			// F
-	;;
-
-	setf.s	 f38=r0			// M2
-	setf.s	 f39=r0			// M3
-	mov      f40=f0			// F
-
-	ldfps	 f41,f42=[sp]		// M0
-	ldfps	 f43,f44=[sp]		// M1
-	mov      f45=f0			// F
-
-	setf.s	 f46=r0			// M2
-	setf.s	 f47=r0			// M3
-	mov      f48=f0			// F
-
-	ldfps	 f49,f50=[sp]		// M0
-	ldfps	 f51,f52=[sp]		// M1
-	mov      f53=f0			// F
-
-	setf.s	 f54=r0			// M2
-	setf.s	 f55=r0			// M3
-	mov      f56=f0			// F
-
-	ldfps	 f57,f58=[sp]		// M0
-	ldfps	 f59,f60=[sp]		// M1
-	mov      f61=f0			// F
-
-	setf.s	 f62=r0			// M2
-	setf.s	 f63=r0			// M3
-	mov      f64=f0			// F
-
-	ldfps	 f65,f66=[sp]		// M0
-	ldfps	 f67,f68=[sp]		// M1
-	mov      f69=f0			// F
-
-	setf.s	 f70=r0			// M2
-	setf.s	 f71=r0			// M3
-	mov      f72=f0			// F
-
-	ldfps	 f73,f74=[sp]		// M0
-	ldfps	 f75,f76=[sp]		// M1
-	mov      f77=f0			// F
-
-	setf.s	 f78=r0			// M2
-	setf.s	 f79=r0			// M3
-	mov      f80=f0			// F
-
-	ldfps	 f81,f82=[sp]		// M0
-	ldfps	 f83,f84=[sp]		// M1
-	mov      f85=f0			// F
-
-	setf.s	 f86=r0			// M2
-	setf.s	 f87=r0			// M3
-	mov      f88=f0			// F
-
-	/*
-	 * When the instructions are cached, it would be faster to initialize
-	 * the remaining registers with simply mov instructions (F-unit).
-	 * This gets the time down to ~29 cycles.  However, this would use up
-	 * 33 bundles, whereas continuing with the above pattern yields
-	 * 10 bundles and ~30 cycles.
-	 */
-
-	ldfps	 f89,f90=[sp]		// M0
-	ldfps	 f91,f92=[sp]		// M1
-	mov      f93=f0			// F
-
-	setf.s	 f94=r0			// M2
-	setf.s	 f95=r0			// M3
-	mov      f96=f0			// F
-
-	ldfps	 f97,f98=[sp]		// M0
-	ldfps	 f99,f100=[sp]		// M1
-	mov      f101=f0		// F
-
-	setf.s	 f102=r0		// M2
-	setf.s	 f103=r0		// M3
-	mov      f104=f0		// F
-
-	ldfps	 f105,f106=[sp]		// M0
-	ldfps	 f107,f108=[sp]		// M1
-	mov      f109=f0		// F
-
-	setf.s	 f110=r0		// M2
-	setf.s	 f111=r0		// M3
-	mov      f112=f0		// F
-
-	ldfps	 f113,f114=[sp]		// M0
-	ldfps	 f115,f116=[sp]		// M1
-	mov      f117=f0		// F
-
-	setf.s	 f118=r0		// M2
-	setf.s	 f119=r0		// M3
-	mov      f120=f0		// F
-
-	ldfps	 f121,f122=[sp]		// M0
-	ldfps	 f123,f124=[sp]		// M1
-	mov      f125=f0		// F
-
-	setf.s	 f126=r0		// M2
-	setf.s	 f127=r0		// M3
-	br.ret.sptk.many rp		// F
-END(__ia64_init_fpu)
-
-/*
- * Switch execution mode from virtual to physical
- *
- * Inputs:
- *	r16 = new psr to establish
- * Output:
- *	r19 = old virtual address of ar.bsp
- *	r20 = old virtual address of sp
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_phys)
- {
-	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
-	mov r15=ip
- }
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	mov cr.ipsr=r16			// set new PSR
-	add r3=1f-ia64_switch_mode_phys,r15
-
-	mov r19=ar.bsp
-	mov r20=sp
-	mov r14=rp			// get return address into a general register
-	;;
-
-	// going to physical mode, use tpa to translate virt->phys
-	tpa r17=r19
-	tpa r3=r3
-	tpa sp=sp
-	tpa r14=r14
-	;;
-
-	mov r18=ar.rnat			// save ar.rnat
-	mov ar.bspstore=r17		// this steps on ar.rnat
-	mov cr.iip=r3
-	mov cr.ifs=r0
-	;;
-	mov ar.rnat=r18			// restore ar.rnat
-	rfi				// must be last insn in group
-	;;
-1:	mov rp=r14
-	br.ret.sptk.many rp
-END(ia64_switch_mode_phys)
-
-/*
- * Switch execution mode from physical to virtual
- *
- * Inputs:
- *	r16 = new psr to establish
- *	r19 = new bspstore to establish
- *	r20 = new sp to establish
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_virt)
- {
-	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
-	mov r15=ip
- }
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	mov cr.ipsr=r16			// set new PSR
-	add r3=1f-ia64_switch_mode_virt,r15
-
-	mov r14=rp			// get return address into a general register
-	;;
-
-	// going to virtual
-	//   - for code addresses, set upper bits of addr to KERNEL_START
-	//   - for stack addresses, copy from input argument
-	movl r18=KERNEL_START
-	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
-	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
-	mov sp=r20
-	;;
-	or r3=r3,r18
-	or r14=r14,r18
-	;;
-
-	mov r18=ar.rnat			// save ar.rnat
-	mov ar.bspstore=r19		// this steps on ar.rnat
-	mov cr.iip=r3
-	mov cr.ifs=r0
-	;;
-	mov ar.rnat=r18			// restore ar.rnat
-	rfi				// must be last insn in group
-	;;
-1:	mov rp=r14
-	br.ret.sptk.many rp
-END(ia64_switch_mode_virt)
-
-GLOBAL_ENTRY(ia64_delay_loop)
-	.prologue
-{	nop 0			// work around GAS unwind info generation bug...
-	.save ar.lc,r2
-	mov r2=ar.lc
-	.body
-	;;
-	mov ar.lc=r32
-}
-	;;
-	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
-	// inside function body without corrupting unwind info).
-{	nop 0 }
-1:	br.cloop.sptk.few 1b
-	;;
-	mov ar.lc=r2
-	br.ret.sptk.many rp
-END(ia64_delay_loop)
-
-/*
- * Return a CPU-local timestamp in nano-seconds.  This timestamp is
- * NOT synchronized across CPUs its return value must never be
- * compared against the values returned on another CPU.  The usage in
- * kernel/sched.c ensures that.
- *
- * The return-value of sched_clock() is NOT supposed to wrap-around.
- * If it did, it would cause some scheduling hiccups (at the worst).
- * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
- * that would happen only once every 5+ years.
- *
- * The code below basically calculates:
- *
- *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
- *
- * except that the multiplication and the shift are done with 128-bit
- * intermediate precision so that we can produce a full 64-bit result.
- */
-GLOBAL_ENTRY(sched_clock)
-	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
-	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
-	;;
-	ldf8 f8=[r8]
-	;;
-	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
-	;;
-	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
-	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
-	;;
-	getf.sig r8=f10		//						(5 cyc)
-	getf.sig r9=f11
-	;;
-	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
-	br.ret.sptk.many rp
-END(sched_clock)
-
-GLOBAL_ENTRY(start_kernel_thread)
-	.prologue
-	.save rp, r0				// this is the end of the call-chain
-	.body
-	alloc r2 = ar.pfs, 0, 0, 2, 0
-	mov out0 = r9
-	mov out1 = r11;;
-	br.call.sptk.many rp = kernel_thread_helper;;
-	mov out0 = r8
-	br.call.sptk.many rp = sys_exit;;
-1:	br.sptk.few 1b				// not reached
-END(start_kernel_thread)
-
-#ifdef CONFIG_IA64_BRL_EMU
-
-/*
- *  Assembly routines used by brl_emu.c to set preserved register state.
- */
-
-#define SET_REG(reg)				\
- GLOBAL_ENTRY(ia64_set_##reg);			\
-	alloc r16=ar.pfs,1,0,0,0;		\
-	mov reg=r32;				\
-	;;					\
-	br.ret.sptk.many rp;			\
- END(ia64_set_##reg)
-
-SET_REG(b1);
-SET_REG(b2);
-SET_REG(b3);
-SET_REG(b4);
-SET_REG(b5);
-
-#endif /* CONFIG_IA64_BRL_EMU */
-
-#ifdef CONFIG_SMP
-	/*
-	 * This routine handles spinlock contention.  It uses a non-standard calling
-	 * convention to avoid converting leaf routines into interior routines.  Because
-	 * of this special convention, there are several restrictions:
-	 *
-	 * - do not use gp relative variables, this code is called from the kernel
-	 *   and from modules, r1 is undefined.
-	 * - do not use stacked registers, the caller owns them.
-	 * - do not use the scratch stack space, the caller owns it.
-	 * - do not use any registers other than the ones listed below
-	 *
-	 * Inputs:
-	 *   ar.pfs - saved CFM of caller
-	 *   ar.ccv - 0 (and available for use)
-	 *   r27    - flags from spin_lock_irqsave or 0.  Must be preserved.
-	 *   r28    - available for use.
-	 *   r29    - available for use.
-	 *   r30    - available for use.
-	 *   r31    - address of lock, available for use.
-	 *   b6     - return address
-	 *   p14    - available for use.
-	 *   p15    - used to track flag status.
-	 *
-	 * If you patch this code to use more registers, do not forget to update
-	 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
-	 */
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-
-GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
-	.prologue
-	.save ar.pfs, r0	// this code effectively has a zero frame size
-	.save rp, r28
-	.body
-	nop 0
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	.restore sp		// pop existing prologue after next insn
-	mov b6 = r28
-	.prologue
-	.save ar.pfs, r0
-	.altrp b6
-	.body
-	;;
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	nop 0
-	;;
-	cmp4.ne p14,p0=r30,r0
-(p14)	br.cond.sptk.few .wait
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	br.cond.sptk.few b6	// lock is now free, try to acquire
-	.global ia64_spinlock_contention_pre3_4_end	// for kernprof
-ia64_spinlock_contention_pre3_4_end:
-END(ia64_spinlock_contention_pre3_4)
-
-#else
-
-GLOBAL_ENTRY(ia64_spinlock_contention)
-	.prologue
-	.altrp b6
-	.body
-	tbit.nz p15,p0=r27,IA64_PSR_I_BIT
-	;;
-.wait:
-(p15)	ssm psr.i		// reenable interrupts if they were on
-				// DavidM says that srlz.d is slow and is not required in this case
-.wait2:
-	// exponential backoff, kdb, lockmeter etc. go in here
-	hint @pause
-	ld4 r30=[r31]		// don't use ld4.bias; if it's contended, we won't write the word
-	;;
-	cmp4.ne p14,p0=r30,r0
-	mov r30 = 1
-(p14)	br.cond.sptk.few .wait2
-(p15)	rsm psr.i		// disable interrupts if we reenabled them
-	;;
-	cmpxchg4.acq r30=[r31], r30, ar.ccv
-	;;
-	cmp4.ne p14,p0=r0,r30
-(p14)	br.cond.sptk.few .wait
-
-	br.ret.sptk.many b6	// lock is now taken
-END(ia64_spinlock_contention)
-
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-GLOBAL_ENTRY(ia64_jump_to_sal)
-	alloc r16=ar.pfs,1,0,0,0;;
-	rsm psr.i  | psr.ic
-{
-	flushrs
-	srlz.i
-}
-	tpa r25=in0
-	movl r18=tlb_purge_done;;
-	DATA_VA_TO_PA(r18);;
-	mov b1=r18 	// Return location
-	movl r18=ia64_do_tlb_purge;;
-	DATA_VA_TO_PA(r18);;
-	mov b2=r18 	// doing tlb_flush work
-	mov ar.rsc=0  // Put RSE  in enforced lazy, LE mode
-	movl r17=1f;;
-	DATA_VA_TO_PA(r17);;
-	mov cr.iip=r17
-	movl r16=SAL_PSR_BITS_TO_SET;;
-	mov cr.ipsr=r16
-	mov cr.ifs=r0;;
-	rfi;;
-1:
-	/*
-	 * Invalidate all TLB data/inst
-	 */
-	br.sptk.many b2;; // jump to tlb purge code
-
-tlb_purge_done:
-	RESTORE_REGION_REGS(r25, r17,r18,r19);;
-	RESTORE_REG(b0, r25, r17);;
-	RESTORE_REG(b1, r25, r17);;
-	RESTORE_REG(b2, r25, r17);;
-	RESTORE_REG(b3, r25, r17);;
-	RESTORE_REG(b4, r25, r17);;
-	RESTORE_REG(b5, r25, r17);;
-	ld8 r1=[r25],0x08;;
-	ld8 r12=[r25],0x08;;
-	ld8 r13=[r25],0x08;;
-	RESTORE_REG(ar.fpsr, r25, r17);;
-	RESTORE_REG(ar.pfs, r25, r17);;
-	RESTORE_REG(ar.rnat, r25, r17);;
-	RESTORE_REG(ar.unat, r25, r17);;
-	RESTORE_REG(ar.bspstore, r25, r17);;
-	RESTORE_REG(cr.dcr, r25, r17);;
-	RESTORE_REG(cr.iva, r25, r17);;
-	RESTORE_REG(cr.pta, r25, r17);;
-	RESTORE_REG(cr.itv, r25, r17);;
-	RESTORE_REG(cr.pmv, r25, r17);;
-	RESTORE_REG(cr.cmcv, r25, r17);;
-	RESTORE_REG(cr.lrr0, r25, r17);;
-	RESTORE_REG(cr.lrr1, r25, r17);;
-	ld8 r4=[r25],0x08;;
-	ld8 r5=[r25],0x08;;
-	ld8 r6=[r25],0x08;;
-	ld8 r7=[r25],0x08;;
-	ld8 r17=[r25],0x08;;
-	mov pr=r17,-1;;
-	RESTORE_REG(ar.lc, r25, r17);;
-	/*
-	 * Now Restore floating point regs
-	 */
-	ldf.fill.nta f2=[r25],16;;
-	ldf.fill.nta f3=[r25],16;;
-	ldf.fill.nta f4=[r25],16;;
-	ldf.fill.nta f5=[r25],16;;
-	ldf.fill.nta f16=[r25],16;;
-	ldf.fill.nta f17=[r25],16;;
-	ldf.fill.nta f18=[r25],16;;
-	ldf.fill.nta f19=[r25],16;;
-	ldf.fill.nta f20=[r25],16;;
-	ldf.fill.nta f21=[r25],16;;
-	ldf.fill.nta f22=[r25],16;;
-	ldf.fill.nta f23=[r25],16;;
-	ldf.fill.nta f24=[r25],16;;
-	ldf.fill.nta f25=[r25],16;;
-	ldf.fill.nta f26=[r25],16;;
-	ldf.fill.nta f27=[r25],16;;
-	ldf.fill.nta f28=[r25],16;;
-	ldf.fill.nta f29=[r25],16;;
-	ldf.fill.nta f30=[r25],16;;
-	ldf.fill.nta f31=[r25],16;;
-
-	/*
-	 * Now that we have done all the register restores
-	 * we are now ready for the big DIVE to SAL Land
-	 */
-	ssm psr.ic;;
-	srlz.d;;
-	br.ret.sptk.many b0;;
-END(ia64_jump_to_sal)
-#endif /* CONFIG_HOTPLUG_CPU */
-
-#endif /* CONFIG_SMP */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c b/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
deleted file mode 100644
index 1541b57a5c..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/iosapic.c
+++ /dev/null
@@ -1,1253 +0,0 @@
-/*
- * I/O SAPIC support.
- *
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
- * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- *
- * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O
- *				APIC code.  In particular, we now have separate
- *				handlers for edge and level triggered
- *				interrupts.
- * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
- *				allocation PCI to vector mapping, shared PCI
- *				interrupts.
- * 00/10/27	D. Mosberger	Document things a bit more to make them more
- *				understandable.  Clean up much of the old
- *				IOSAPIC cruft.
- * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts
- *				and fixes for ACPI S5(SoftOff) support.
- * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
- * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
- *				vectors in iosapic_set_affinity(),
- *				initializations for /proc/irq/#/smp_affinity
- * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
- * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
- * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to
- *				IOSAPIC mapping error
- * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
- * 02/08/04	T. Kochi	Cleaned up terminology (irq, global system
- *				interrupt, vector, etc.)
- * 02/09/20	D. Mosberger	Simplified by taking advantage of ACPI's
- *				pci_irq code.
- * 03/02/19	B. Helgaas	Make pcat_compat system-wide, not per-IOSAPIC.
- *				Remove iosapic_address & gsi_base from
- *				external interfaces.  Rationalize
- *				__init/__devinit attributes.
- * 04/12/04 Ashok Raj	<ashok.raj@intel.com> Intel Corporation 2004
- *				Updated to work with irq migration necessary
- *				for CPU Hotplug
- */
-/*
- * Here is what the interrupt logic between a PCI device and the kernel looks
- * like:
- *
- * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
- *     INTD).  The device is uniquely identified by its bus-, and slot-number
- *     (the function number does not matter here because all functions share
- *     the same interrupt lines).
- *
- * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
- *     controller.  Multiple interrupt lines may have to share the same
- *     IOSAPIC pin (if they're level triggered and use the same polarity).
- *     Each interrupt line has a unique Global System Interrupt (GSI) number
- *     which can be calculated as the sum of the controller's base GSI number
- *     and the IOSAPIC pin number to which the line connects.
- *
- * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
- * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
- * sent to the CPU.
- *
- * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
- *     used as architecture-independent interrupt handling mechanism in Linux.
- *     As an IRQ is a number, we have to have
- *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
- *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
- *     platform can implement platform_irq_to_vector(irq) and
- *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
- *     Please see also include/asm-ia64/hw_irq.h for those APIs.
- *
- * To sum up, there are three levels of mappings involved:
- *
- *	PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
- *
- * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
- * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
- * (isa_irq) is the only exception in this source code.
- */
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/string.h>
-#include <linux/bootmem.h>
-
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-
-#undef DEBUG_INTERRUPT_ROUTING
-
-#ifdef DEBUG_INTERRUPT_ROUTING
-#define DBG(fmt...)	printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-#define NR_PREALLOCATE_RTE_ENTRIES \
-	(PAGE_SIZE / sizeof(struct iosapic_rte_info))
-#define RTE_PREALLOCATED	(1)
-
-static DEFINE_SPINLOCK(iosapic_lock);
-
-/*
- * These tables map IA-64 vectors to the IOSAPIC pin that generates this
- * vector.
- */
-
-struct iosapic_rte_info {
-	struct list_head rte_list;	/* node in list of RTEs sharing the
-					 * same vector */
-	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
-	char		rte_index;	/* IOSAPIC RTE index */
-	int		refcnt;		/* reference counter */
-	unsigned int	flags;		/* flags */
-} ____cacheline_aligned;
-
-static struct iosapic_intr_info {
-	struct list_head rtes;		/* RTEs using this vector (empty =>
-					 * not an IOSAPIC interrupt) */
-	int		count;		/* # of RTEs that shares this vector */
-	u32		low32;		/* current value of low word of
-					 * Redirection table entry */
-	unsigned int	dest;		/* destination CPU physical ID */
-	unsigned char	dmode	: 3;	/* delivery mode (see iosapic.h) */
-	unsigned char 	polarity: 1;	/* interrupt polarity
-					 * (see iosapic.h) */
-	unsigned char	trigger	: 1;	/* trigger mode (see iosapic.h) */
-} iosapic_intr_info[IA64_NUM_VECTORS];
-
-static struct iosapic {
-	char __iomem	*addr;		/* base address of IOSAPIC */
-	unsigned int 	gsi_base;	/* first GSI assigned to this
-					 * IOSAPIC */
-	unsigned short 	num_rte;	/* # of RTEs on this IOSAPIC */
-	int		rtes_inuse;	/* # of RTEs in use on this IOSAPIC */
-#ifdef CONFIG_NUMA
-	unsigned short	node;		/* numa node association via pxm */
-#endif
-} iosapic_lists[NR_IOSAPICS];
-
-static unsigned char pcat_compat __devinitdata;	/* 8259 compatibility flag */
-
-static int iosapic_kmalloc_ok;
-static LIST_HEAD(free_rte_list);
-
-#ifdef CONFIG_XEN
-#include <xen/interface/xen.h>
-#include <xen/interface/physdev.h>
-#include <asm/hypervisor.h>
-static inline unsigned int xen_iosapic_read(char __iomem *iosapic, unsigned int reg)
-{
-	struct physdev_apic apic_op;
-	int ret;
-
-	apic_op.apic_physbase = (unsigned long)iosapic -
-					__IA64_UNCACHED_OFFSET;
-	apic_op.reg = reg;
-	ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
-	if (ret)
-		return ret;
-	return apic_op.value;
-}
-
-static inline void xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
-{
-	struct physdev_apic apic_op;
-
-	apic_op.apic_physbase = (unsigned long)iosapic - 
-					__IA64_UNCACHED_OFFSET;
-	apic_op.reg = reg;
-	apic_op.value = val;
-	HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
-}
-
-static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg)
-{
-	if (!is_running_on_xen()) {
-		writel(reg, iosapic + IOSAPIC_REG_SELECT);
-		return readl(iosapic + IOSAPIC_WINDOW);
-	} else
-		return xen_iosapic_read(iosapic, reg);
-}
-
-static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
-{
-	if (!is_running_on_xen()) {
-		writel(reg, iosapic + IOSAPIC_REG_SELECT);
-		writel(val, iosapic + IOSAPIC_WINDOW);
-	} else
-		xen_iosapic_write(iosapic, reg, val);
-}
-
-int xen_assign_irq_vector(int irq)
-{
-	struct physdev_irq irq_op;
-
-	irq_op.irq = irq;
-	if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
-		return -ENOSPC;
-
-	return irq_op.vector;
-}
-
-void xen_free_irq_vector(int vector)
-{
-	struct physdev_irq irq_op;
-
-	irq_op.vector = vector;
-	if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op))
-		printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n",
-		       __FUNCTION__, vector);
-}
-#endif /* XEN */
-
-/*
- * Find an IOSAPIC associated with a GSI
- */
-static inline int
-find_iosapic (unsigned int gsi)
-{
-	int i;
-
-	for (i = 0; i < NR_IOSAPICS; i++) {
-		if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
-		    iosapic_lists[i].num_rte)
-			return i;
-	}
-
-	return -1;
-}
-
-static inline int
-_gsi_to_vector (unsigned int gsi)
-{
-	struct iosapic_intr_info *info;
-	struct iosapic_rte_info *rte;
-
-	for (info = iosapic_intr_info; info <
-		     iosapic_intr_info + IA64_NUM_VECTORS; ++info)
-		list_for_each_entry(rte, &info->rtes, rte_list)
-			if (rte->gsi_base + rte->rte_index == gsi)
-				return info - iosapic_intr_info;
-	return -1;
-}
-
-/*
- * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
- * entry exists, return -1.
- */
-inline int
-gsi_to_vector (unsigned int gsi)
-{
-	return _gsi_to_vector(gsi);
-}
-
-int
-gsi_to_irq (unsigned int gsi)
-{
-	unsigned long flags;
-	int irq;
-	/*
-	 * XXX fix me: this assumes an identity mapping between IA-64 vector
-	 * and Linux irq numbers...
-	 */
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		irq = _gsi_to_vector(gsi);
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-
-	return irq;
-}
-
-static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
-						  unsigned int vec)
-{
-	struct iosapic_rte_info *rte;
-
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		if (rte->gsi_base + rte->rte_index == gsi)
-			return rte;
-	return NULL;
-}
-
-static void
-set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
-{
-	unsigned long pol, trigger, dmode;
-	u32 low32, high32;
-	char __iomem *addr;
-	int rte_index;
-	char redir;
-	struct iosapic_rte_info *rte;
-
-	DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
-
-	rte = gsi_vector_to_rte(gsi, vector);
-	if (!rte)
-		return;		/* not an IOSAPIC interrupt */
-
-	rte_index = rte->rte_index;
-	addr	= rte->addr;
-	pol     = iosapic_intr_info[vector].polarity;
-	trigger = iosapic_intr_info[vector].trigger;
-	dmode   = iosapic_intr_info[vector].dmode;
-
-	redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
-
-#ifdef CONFIG_SMP
-	{
-		unsigned int irq;
-
-		for (irq = 0; irq < NR_IRQS; ++irq)
-			if (irq_to_vector(irq) == vector) {
-				set_irq_affinity_info(irq,
-						      (int)(dest & 0xffff),
-						      redir);
-				break;
-			}
-	}
-#endif
-
-	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
-		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
-		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
-		 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
-		 vector);
-
-	/* dest contains both id and eid */
-	high32 = (dest << IOSAPIC_DEST_SHIFT);
-
-	iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
-	iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-	iosapic_intr_info[vector].low32 = low32;
-	iosapic_intr_info[vector].dest = dest;
-}
-
-static void
-nop (unsigned int irq)
-{
-	/* do nothing... */
-}
-
-static void
-mask_irq (unsigned int irq)
-{
-	unsigned long flags;
-	char __iomem *addr;
-	u32 low32;
-	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
-	struct iosapic_rte_info *rte;
-
-	if (list_empty(&iosapic_intr_info[vec].rtes))
-		return;			/* not an IOSAPIC interrupt! */
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		/* set only the mask bit */
-		low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-static void
-unmask_irq (unsigned int irq)
-{
-	unsigned long flags;
-	char __iomem *addr;
-	u32 low32;
-	int rte_index;
-	ia64_vector vec = irq_to_vector(irq);
-	struct iosapic_rte_info *rte;
-
-	if (list_empty(&iosapic_intr_info[vec].rtes))
-		return;			/* not an IOSAPIC interrupt! */
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-
-static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-	unsigned long flags;
-	u32 high32, low32;
-	int dest, rte_index;
-	char __iomem *addr;
-	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
-	ia64_vector vec;
-	struct iosapic_rte_info *rte;
-
-	irq &= (~IA64_IRQ_REDIRECTED);
-	vec = irq_to_vector(irq);
-
-	if (cpus_empty(mask))
-		return;
-
-	dest = cpu_physical_id(first_cpu(mask));
-
-	if (list_empty(&iosapic_intr_info[vec].rtes))
-		return;			/* not an IOSAPIC interrupt */
-
-	set_irq_affinity_info(irq, dest, redir);
-
-	/* dest contains both id and eid */
-	high32 = dest << IOSAPIC_DEST_SHIFT;
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		low32 = iosapic_intr_info[vec].low32 &
-			~(7 << IOSAPIC_DELIVERY_SHIFT);
-
-		if (redir)
-		        /* change delivery mode to lowest priority */
-			low32 |= (IOSAPIC_LOWEST_PRIORITY <<
-				  IOSAPIC_DELIVERY_SHIFT);
-		else
-		        /* change delivery mode to fixed */
-			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
-
-		iosapic_intr_info[vec].low32 = low32;
-		iosapic_intr_info[vec].dest = dest;
-		list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
-				    rte_list) {
-			addr = rte->addr;
-			rte_index = rte->rte_index;
-			iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
-				      high32);
-			iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
-		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-#endif
-}
-
-/*
- * Handlers for level-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_level_irq (unsigned int irq)
-{
-	unmask_irq(irq);
-	return 0;
-}
-
-static void
-iosapic_end_level_irq (unsigned int irq)
-{
-	ia64_vector vec = irq_to_vector(irq);
-	struct iosapic_rte_info *rte;
-
-	move_native_irq(irq);
-	list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
-		iosapic_eoi(rte->addr, vec);
-}
-
-#define iosapic_shutdown_level_irq	mask_irq
-#define iosapic_enable_level_irq	unmask_irq
-#define iosapic_disable_level_irq	mask_irq
-#define iosapic_ack_level_irq		nop
-
-struct hw_interrupt_type irq_type_iosapic_level = {
-	.typename =	"IO-SAPIC-level",
-	.startup =	iosapic_startup_level_irq,
-	.shutdown =	iosapic_shutdown_level_irq,
-	.enable =	iosapic_enable_level_irq,
-	.disable =	iosapic_disable_level_irq,
-	.ack =		iosapic_ack_level_irq,
-	.end =		iosapic_end_level_irq,
-	.set_affinity =	iosapic_set_affinity
-};
-
-/*
- * Handlers for edge-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_edge_irq (unsigned int irq)
-{
-	unmask_irq(irq);
-	/*
-	 * IOSAPIC simply drops interrupts pended while the
-	 * corresponding pin was masked, so we can't know if an
-	 * interrupt is pending already.  Let's hope not...
-	 */
-	return 0;
-}
-
-static void
-iosapic_ack_edge_irq (unsigned int irq)
-{
-	irq_desc_t *idesc = irq_desc + irq;
-
-	move_native_irq(irq);
-	/*
-	 * Once we have recorded IRQ_PENDING already, we can mask the
-	 * interrupt for real. This prevents IRQ storms from unhandled
-	 * devices.
-	 */
-	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
-	    (IRQ_PENDING|IRQ_DISABLED))
-		mask_irq(irq);
-}
-
-#define iosapic_enable_edge_irq		unmask_irq
-#define iosapic_disable_edge_irq	nop
-#define iosapic_end_edge_irq		nop
-
-struct hw_interrupt_type irq_type_iosapic_edge = {
-	.typename =	"IO-SAPIC-edge",
-	.startup =	iosapic_startup_edge_irq,
-	.shutdown =	iosapic_disable_edge_irq,
-	.enable =	iosapic_enable_edge_irq,
-	.disable =	iosapic_disable_edge_irq,
-	.ack =		iosapic_ack_edge_irq,
-	.end =		iosapic_end_edge_irq,
-	.set_affinity =	iosapic_set_affinity
-};
-
-unsigned int
-iosapic_version (char __iomem *addr)
-{
-	/*
-	 * IOSAPIC Version Register return 32 bit structure like:
-	 * {
-	 *	unsigned int version   : 8;
-	 *	unsigned int reserved1 : 8;
-	 *	unsigned int max_redir : 8;
-	 *	unsigned int reserved2 : 8;
-	 * }
-	 */
-	return iosapic_read(addr, IOSAPIC_VERSION);
-}
-
-static int iosapic_find_sharable_vector (unsigned long trigger,
-					 unsigned long pol)
-{
-	int i, vector = -1, min_count = -1;
-	struct iosapic_intr_info *info;
-
-	/*
-	 * shared vectors for edge-triggered interrupts are not
-	 * supported yet
-	 */
-	if (trigger == IOSAPIC_EDGE)
-		return -1;
-
-	for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
-		info = &iosapic_intr_info[i];
-		if (info->trigger == trigger && info->polarity == pol &&
-		    (info->dmode == IOSAPIC_FIXED || info->dmode ==
-		     IOSAPIC_LOWEST_PRIORITY)) {
-			if (min_count == -1 || info->count < min_count) {
-				vector = i;
-				min_count = info->count;
-			}
-		}
-	}
-
-	return vector;
-}
-
-/*
- * if the given vector is already owned by other,
- *  assign a new vector for the other and make the vector available
- */
-static void __init
-iosapic_reassign_vector (int vector)
-{
-	int new_vector;
-
-	if (!list_empty(&iosapic_intr_info[vector].rtes)) {
-		new_vector = assign_irq_vector(AUTO_ASSIGN);
-		if (new_vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
-		printk(KERN_INFO "Reassigning vector %d to %d\n",
-		       vector, new_vector);
-		memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
-		       sizeof(struct iosapic_intr_info));
-		INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
-		list_move(iosapic_intr_info[vector].rtes.next,
-			  &iosapic_intr_info[new_vector].rtes);
-		memset(&iosapic_intr_info[vector], 0,
-		       sizeof(struct iosapic_intr_info));
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-	}
-}
-
-static struct iosapic_rte_info *iosapic_alloc_rte (void)
-{
-	int i;
-	struct iosapic_rte_info *rte;
-	int preallocated = 0;
-
-	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
-		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
-				    NR_PREALLOCATE_RTE_ENTRIES);
-		if (!rte)
-			return NULL;
-		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
-			list_add(&rte->rte_list, &free_rte_list);
-	}
-
-	if (!list_empty(&free_rte_list)) {
-		rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
-				 rte_list);
-		list_del(&rte->rte_list);
-		preallocated++;
-	} else {
-		rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
-		if (!rte)
-			return NULL;
-	}
-
-	memset(rte, 0, sizeof(struct iosapic_rte_info));
-	if (preallocated)
-		rte->flags |= RTE_PREALLOCATED;
-
-	return rte;
-}
-
-static void iosapic_free_rte (struct iosapic_rte_info *rte)
-{
-	if (rte->flags & RTE_PREALLOCATED)
-		list_add_tail(&rte->rte_list, &free_rte_list);
-	else
-		kfree(rte);
-}
-
-static inline int vector_is_shared (int vector)
-{
-	return (iosapic_intr_info[vector].count > 1);
-}
-
-static int
-register_intr (unsigned int gsi, int vector, unsigned char delivery,
-	       unsigned long polarity, unsigned long trigger)
-{
-	irq_desc_t *idesc;
-	struct hw_interrupt_type *irq_type;
-	int rte_index;
-	int index;
-	unsigned long gsi_base;
-	void __iomem *iosapic_address;
-	struct iosapic_rte_info *rte;
-
-	index = find_iosapic(gsi);
-	if (index < 0) {
-		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-		       __FUNCTION__, gsi);
-		return -ENODEV;
-	}
-
-	iosapic_address = iosapic_lists[index].addr;
-	gsi_base = iosapic_lists[index].gsi_base;
-
-	rte = gsi_vector_to_rte(gsi, vector);
-	if (!rte) {
-		rte = iosapic_alloc_rte();
-		if (!rte) {
-			printk(KERN_WARNING "%s: cannot allocate memory\n",
-			       __FUNCTION__);
-			return -ENOMEM;
-		}
-
-		rte_index = gsi - gsi_base;
-		rte->rte_index	= rte_index;
-		rte->addr	= iosapic_address;
-		rte->gsi_base	= gsi_base;
-		rte->refcnt++;
-		list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
-		iosapic_intr_info[vector].count++;
-		iosapic_lists[index].rtes_inuse++;
-	}
-	else if (vector_is_shared(vector)) {
-		struct iosapic_intr_info *info = &iosapic_intr_info[vector];
-		if (info->trigger != trigger || info->polarity != polarity) {
-			printk (KERN_WARNING
-				"%s: cannot override the interrupt\n",
-				__FUNCTION__);
-			return -EINVAL;
-		}
-	}
-
-	iosapic_intr_info[vector].polarity = polarity;
-	iosapic_intr_info[vector].dmode    = delivery;
-	iosapic_intr_info[vector].trigger  = trigger;
-
-	if (is_running_on_xen())
-		return 0;
-
-	if (trigger == IOSAPIC_EDGE)
-		irq_type = &irq_type_iosapic_edge;
-	else
-		irq_type = &irq_type_iosapic_level;
-
-	idesc = irq_desc + vector;
-	if (idesc->chip != irq_type) {
-		if (idesc->chip != &no_irq_type)
-			printk(KERN_WARNING
-			       "%s: changing vector %d from %s to %s\n",
-			       __FUNCTION__, vector,
-			       idesc->chip->typename, irq_type->typename);
-		idesc->chip = irq_type;
-	}
-	return 0;
-}
-
-static unsigned int
-get_target_cpu (unsigned int gsi, int vector)
-{
-#ifdef CONFIG_SMP
-	static int cpu = -1;
-	extern int cpe_vector;
-
-	/*
-	 * In case of vector shared by multiple RTEs, all RTEs that
-	 * share the vector need to use the same destination CPU.
-	 */
-	if (!list_empty(&iosapic_intr_info[vector].rtes))
-		return iosapic_intr_info[vector].dest;
-
-	/*
-	 * If the platform supports redirection via XTP, let it
-	 * distribute interrupts.
-	 */
-	if (smp_int_redirect & SMP_IRQ_REDIRECTION)
-		return cpu_physical_id(smp_processor_id());
-
-	/*
-	 * Some interrupts (ACPI SCI, for instance) are registered
-	 * before the BSP is marked as online.
-	 */
-	if (!cpu_online(smp_processor_id()))
-		return cpu_physical_id(smp_processor_id());
-
-#ifdef CONFIG_ACPI
-	if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
-		return get_cpei_target_cpu();
-#endif
-
-#ifdef CONFIG_NUMA
-	{
-		int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
-		cpumask_t cpu_mask;
-
-		iosapic_index = find_iosapic(gsi);
-		if (iosapic_index < 0 ||
-		    iosapic_lists[iosapic_index].node == MAX_NUMNODES)
-			goto skip_numa_setup;
-
-		cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
-
-		for_each_cpu_mask(numa_cpu, cpu_mask) {
-			if (!cpu_online(numa_cpu))
-				cpu_clear(numa_cpu, cpu_mask);
-		}
-
-		num_cpus = cpus_weight(cpu_mask);
-
-		if (!num_cpus)
-			goto skip_numa_setup;
-
-		/* Use vector assignment to distribute across cpus in node */
-		cpu_index = vector % num_cpus;
-
-		for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
-			numa_cpu = next_cpu(numa_cpu, cpu_mask);
-
-		if (numa_cpu != NR_CPUS)
-			return cpu_physical_id(numa_cpu);
-	}
-skip_numa_setup:
-#endif
-	/*
-	 * Otherwise, round-robin interrupt vectors across all the
-	 * processors.  (It'd be nice if we could be smarter in the
-	 * case of NUMA.)
-	 */
-	do {
-		if (++cpu >= NR_CPUS)
-			cpu = 0;
-	} while (!cpu_online(cpu));
-
-	return cpu_physical_id(cpu);
-#else  /* CONFIG_SMP */
-	return cpu_physical_id(smp_processor_id());
-#endif
-}
-
-/*
- * ACPI can describe IOSAPIC interrupts via static tables and namespace
- * methods.  This provides an interface to register those interrupts and
- * program the IOSAPIC RTE.
- */
-int
-iosapic_register_intr (unsigned int gsi,
-		       unsigned long polarity, unsigned long trigger)
-{
-	int vector, mask = 1, err;
-	unsigned int dest;
-	unsigned long flags;
-	struct iosapic_rte_info *rte;
-	u32 low32;
-again:
-	/*
-	 * If this GSI has already been registered (i.e., it's a
-	 * shared interrupt, or we lost a race to register it),
-	 * don't touch the RTE.
-	 */
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		vector = gsi_to_vector(gsi);
-		if (vector > 0) {
-			rte = gsi_vector_to_rte(gsi, vector);
-			rte->refcnt++;
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return vector;
-		}
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-
-	/* If vector is running out, we try to find a sharable vector */
-	vector = assign_irq_vector(AUTO_ASSIGN);
-	if (vector < 0) {
-		vector = iosapic_find_sharable_vector(trigger, polarity);
-  		if (vector < 0)
-			return -ENOSPC;
-	}
-
-	spin_lock_irqsave(&irq_desc[vector].lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if (gsi_to_vector(gsi) > 0) {
-			if (list_empty(&iosapic_intr_info[vector].rtes))
-				free_irq_vector(vector);
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_desc[vector].lock,
-					       flags);
-			goto again;
-		}
-
-		dest = get_target_cpu(gsi, vector);
-		err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
-			      polarity, trigger);
-		if (err < 0) {
-			spin_unlock(&iosapic_lock);
-			spin_unlock_irqrestore(&irq_desc[vector].lock,
-					       flags);
-			return err;
-		}
-
-		/*
-		 * If the vector is shared and already unmasked for
-		 * other interrupt sources, don't mask it.
-		 */
-		low32 = iosapic_intr_info[vector].low32;
-		if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
-			mask = 0;
-		set_rte(gsi, vector, dest, mask);
-	}
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
-
-	printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
-	       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       cpu_logical_id(dest), dest, vector);
-
-	return vector;
-}
-
-void
-iosapic_unregister_intr (unsigned int gsi)
-{
-	unsigned long flags;
-	int irq, vector, index;
-	irq_desc_t *idesc;
-	u32 low32;
-	unsigned long trigger, polarity;
-	unsigned int dest;
-	struct iosapic_rte_info *rte;
-
-	/*
-	 * If the irq associated with the gsi is not found,
-	 * iosapic_unregister_intr() is unbalanced. We need to check
-	 * this again after getting locks.
-	 */
-	irq = gsi_to_irq(gsi);
-	if (irq < 0) {
-		printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
-		       gsi);
-		WARN_ON(1);
-		return;
-	}
-	vector = irq_to_vector(irq);
-
-	idesc = irq_desc + irq;
-	spin_lock_irqsave(&idesc->lock, flags);
-	spin_lock(&iosapic_lock);
-	{
-		if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
-			printk(KERN_ERR
-			       "iosapic_unregister_intr(%u) unbalanced\n",
-			       gsi);
-			WARN_ON(1);
-			goto out;
-		}
-
-		if (--rte->refcnt > 0)
-			goto out;
-
-		/* Mask the interrupt */
-		low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
-		iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
-			      low32);
-
-		/* Remove the rte entry from the list */
-		list_del(&rte->rte_list);
-		iosapic_intr_info[vector].count--;
-		iosapic_free_rte(rte);
-		index = find_iosapic(gsi);
-		iosapic_lists[index].rtes_inuse--;
-		WARN_ON(iosapic_lists[index].rtes_inuse < 0);
-
-		trigger	 = iosapic_intr_info[vector].trigger;
-		polarity = iosapic_intr_info[vector].polarity;
-		dest     = iosapic_intr_info[vector].dest;
-		printk(KERN_INFO
-		       "GSI %u (%s, %s) -> CPU %d (0x%04x)"
-		       " vector %d unregistered\n",
-		       gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-		       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-		       cpu_logical_id(dest), dest, vector);
-
-		if (list_empty(&iosapic_intr_info[vector].rtes)) {
-			/* Sanity check */
-			BUG_ON(iosapic_intr_info[vector].count);
-
-			/* Clear the interrupt controller descriptor */
-			idesc->chip = &no_irq_type;
-
-			/* Clear the interrupt information */
-			memset(&iosapic_intr_info[vector], 0,
-			       sizeof(struct iosapic_intr_info));
-			iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
-			INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-
-			if (idesc->action) {
-				printk(KERN_ERR
-				       "interrupt handlers still exist on"
-				       "IRQ %u\n", irq);
-				WARN_ON(1);
-			}
-
-			/* Free the interrupt vector */
-			free_irq_vector(vector);
-		}
-	}
- out:
-	spin_unlock(&iosapic_lock);
-	spin_unlock_irqrestore(&idesc->lock, flags);
-}
-
-/*
- * ACPI calls this when it finds an entry for a platform interrupt.
- */
-int __init
-iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
-				int iosapic_vector, u16 eid, u16 id,
-				unsigned long polarity, unsigned long trigger)
-{
-	static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
-	unsigned char delivery;
-	int vector, mask = 0;
-	unsigned int dest = ((id << 8) | eid) & 0xffff;
-
-	switch (int_type) {
-	      case ACPI_INTERRUPT_PMI:
-		vector = iosapic_vector;
-		/*
-		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
-		 * we need to make sure the vector is available
-		 */
-		iosapic_reassign_vector(vector);
-		delivery = IOSAPIC_PMI;
-		break;
-	      case ACPI_INTERRUPT_INIT:
-		vector = assign_irq_vector(AUTO_ASSIGN);
-		if (vector < 0)
-			panic("%s: out of interrupt vectors!\n", __FUNCTION__);
-		delivery = IOSAPIC_INIT;
-		break;
-	      case ACPI_INTERRUPT_CPEI:
-		vector = IA64_CPE_VECTOR;
-		delivery = IOSAPIC_LOWEST_PRIORITY;
-		mask = 1;
-		break;
-	      default:
-		printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
-		       int_type);
-		return -1;
-	}
-
-	register_intr(gsi, vector, delivery, polarity, trigger);
-
-	printk(KERN_INFO
-	       "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
-	       " vector %d\n",
-	       int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
-	       int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
-	       (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
-	       cpu_logical_id(dest), dest, vector);
-
-	set_rte(gsi, vector, dest, mask);
-	return vector;
-}
-
-/*
- * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
- */
-void __init
-iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
-			  unsigned long polarity,
-			  unsigned long trigger)
-{
-	int vector;
-	unsigned int dest = cpu_physical_id(smp_processor_id());
-
-	vector = isa_irq_to_vector(isa_irq);
-
-	register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
-
-	DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
-	    isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
-	    polarity == IOSAPIC_POL_HIGH ? "high" : "low",
-	    cpu_logical_id(dest), dest, vector);
-
-	set_rte(gsi, vector, dest, 1);
-}
-
-void __init
-iosapic_system_init (int system_pcat_compat)
-{
-	int vector;
-
-	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
-		iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
-		/* mark as unused */
-		INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
-	}
-
-	pcat_compat = system_pcat_compat;
-	if (is_running_on_xen())
-		return;
-
-	if (pcat_compat) {
-		/*
-		 * Disable the compatibility mode interrupts (8259 style),
-		 * needs IN/OUT support enabled.
-		 */
-		printk(KERN_INFO
-		       "%s: Disabling PC-AT compatible 8259 interrupts\n",
-		       __FUNCTION__);
-		outb(0xff, 0xA1);
-		outb(0xff, 0x21);
-	}
-}
-
-static inline int
-iosapic_alloc (void)
-{
-	int index;
-
-	for (index = 0; index < NR_IOSAPICS; index++)
-		if (!iosapic_lists[index].addr)
-			return index;
-
-	printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
-	return -1;
-}
-
-static inline void
-iosapic_free (int index)
-{
-	memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
-}
-
-static inline int
-iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
-{
-	int index;
-	unsigned int gsi_end, base, end;
-
-	/* check gsi range */
-	gsi_end = gsi_base + ((ver >> 16) & 0xff);
-	for (index = 0; index < NR_IOSAPICS; index++) {
-		if (!iosapic_lists[index].addr)
-			continue;
-
-		base = iosapic_lists[index].gsi_base;
-		end  = base + iosapic_lists[index].num_rte - 1;
-
-		if (gsi_end < base || end < gsi_base)
-			continue; /* OK */
-
-		return -EBUSY;
-	}
-	return 0;
-}
-
-int __devinit
-iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
-{
-	int num_rte, err, index;
-	unsigned int isa_irq, ver;
-	char __iomem *addr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		addr = ioremap(phys_addr, 0);
-		ver = iosapic_version(addr);
-
-		if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
-			iounmap(addr);
-			spin_unlock_irqrestore(&iosapic_lock, flags);
-			return err;
-		}
-
-		/*
-		 * The MAX_REDIR register holds the highest input pin
-		 * number (starting from 0).
-		 * We add 1 so that we can use it for number of pins (= RTEs)
-		 */
-		num_rte = ((ver >> 16) & 0xff) + 1;
-
-		index = iosapic_alloc();
-		iosapic_lists[index].addr = addr;
-		iosapic_lists[index].gsi_base = gsi_base;
-		iosapic_lists[index].num_rte = num_rte;
-#ifdef CONFIG_NUMA
-		iosapic_lists[index].node = MAX_NUMNODES;
-#endif
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-
-	if ((gsi_base == 0) && pcat_compat) {
-		/*
-		 * Map the legacy ISA devices into the IOSAPIC data.  Some of
-		 * these may get reprogrammed later on with data from the ACPI
-		 * Interrupt Source Override table.
-		 */
-		for (isa_irq = 0; isa_irq < 16; ++isa_irq)
-			iosapic_override_isa_irq(isa_irq, isa_irq,
-						 IOSAPIC_POL_HIGH,
-						 IOSAPIC_EDGE);
-	}
-	return 0;
-}
-
-#ifdef CONFIG_HOTPLUG
-int
-iosapic_remove (unsigned int gsi_base)
-{
-	int index, err = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		index = find_iosapic(gsi_base);
-		if (index < 0) {
-			printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
-
-		if (iosapic_lists[index].rtes_inuse) {
-			err = -EBUSY;
-			printk(KERN_WARNING
-			       "%s: IOSAPIC for GSI base %u is busy\n",
-			       __FUNCTION__, gsi_base);
-			goto out;
-		}
-
-		iounmap(iosapic_lists[index].addr);
-		iosapic_free(index);
-	}
- out:
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-	return err;
-}
-#endif /* CONFIG_HOTPLUG */
-
-#ifdef CONFIG_NUMA
-void __devinit
-map_iosapic_to_node(unsigned int gsi_base, int node)
-{
-	int index;
-
-	index = find_iosapic(gsi_base);
-	if (index < 0) {
-		printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
-		       __FUNCTION__, gsi_base);
-		return;
-	}
-	iosapic_lists[index].node = node;
-	return;
-}
-#endif
-
-static int __init iosapic_enable_kmalloc (void)
-{
-	iosapic_kmalloc_ok = 1;
-	return 0;
-}
-core_initcall (iosapic_enable_kmalloc);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c b/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c
deleted file mode 100644
index 5a9db93417..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/*
- * linux/arch/ia64/kernel/irq.c
- *
- * Copyright (C) 1998-2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- *  6/10/99: Updated to bring in sync with x86 version to facilitate
- *	     support for SMP and different interrupt controllers.
- *
- * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
- *                      PCI to vector allocation routine.
- * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
- *						Added CPU Hotplug handling for IPF.
- */
-
-#include <linux/module.h>
-
-#include <linux/jiffies.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/kernel_stat.h>
-#include <linux/slab.h>
-#include <linux/ptrace.h>
-#include <linux/random.h>	/* for rand_initialize_irq() */
-#include <linux/signal.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/threads.h>
-#include <linux/bitops.h>
-#ifdef CONFIG_XEN
-#include <linux/cpu.h>
-#endif
-
-#include <asm/delay.h>
-#include <asm/intrinsics.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/machvec.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_PERFMON
-# include <asm/perfmon.h>
-#endif
-
-#define IRQ_DEBUG	0
-
-/* These can be overridden in platform_irq_init */
-int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
-int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
-
-/* default base addr of IPI table */
-void __iomem *ipi_base_addr = ((void __iomem *)
-			       (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
-
-/*
- * Legacy IRQ to IA-64 vector translation table.
- */
-__u8 isa_irq_to_vector_map[16] = {
-	/* 8259 IRQ translation, first 16 entries */
-	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
-	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
-};
-EXPORT_SYMBOL(isa_irq_to_vector_map);
-
-static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)];
-
-int
-assign_irq_vector (int irq)
-{
-	int pos, vector;
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		extern int xen_assign_irq_vector(int);
-		return xen_assign_irq_vector(irq);
-	}
-#endif
- again:
-	pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
-	vector = IA64_FIRST_DEVICE_VECTOR + pos;
-	if (vector > IA64_LAST_DEVICE_VECTOR)
-		return -ENOSPC;
-	if (test_and_set_bit(pos, ia64_vector_mask))
-		goto again;
-	return vector;
-}
-
-void
-free_irq_vector (int vector)
-{
-	int pos;
-
-	if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
-		return;
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		extern void xen_free_irq_vector(int);
-		xen_free_irq_vector(vector);
-		return;
-	}
-#endif
-	pos = vector - IA64_FIRST_DEVICE_VECTOR;
-	if (!test_and_clear_bit(pos, ia64_vector_mask))
-		printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
-}
-
-int
-reserve_irq_vector (int vector)
-{
-	int pos;
-
-	if (vector < IA64_FIRST_DEVICE_VECTOR ||
-	    vector > IA64_LAST_DEVICE_VECTOR)
-		return -EINVAL;
-
-	pos = vector - IA64_FIRST_DEVICE_VECTOR;
-	return test_and_set_bit(pos, ia64_vector_mask);
-}
-
-#ifdef CONFIG_SMP
-#	define IS_RESCHEDULE(vec)	(vec == IA64_IPI_RESCHEDULE)
-#else
-#	define IS_RESCHEDULE(vec)	(0)
-#endif
-/*
- * That's where the IVT branches when we get an external
- * interrupt. This branches to the correct hardware IRQ handler via
- * function ptr.
- */
-void
-ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
-{
-	unsigned long saved_tpr;
-
-#if IRQ_DEBUG
-	{
-		unsigned long bsp, sp;
-
-		/*
-		 * Note: if the interrupt happened while executing in
-		 * the context switch routine (ia64_switch_to), we may
-		 * get a spurious stack overflow here.  This is
-		 * because the register and the memory stack are not
-		 * switched atomically.
-		 */
-		bsp = ia64_getreg(_IA64_REG_AR_BSP);
-		sp = ia64_getreg(_IA64_REG_SP);
-
-		if ((sp - bsp) < 1024) {
-			static unsigned char count;
-			static long last_time;
-
-			if (jiffies - last_time > 5*HZ)
-				count = 0;
-			if (++count < 5) {
-				last_time = jiffies;
-				printk("ia64_handle_irq: DANGER: less than "
-				       "1KB of free stack space!!\n"
-				       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
-			}
-		}
-	}
-#endif /* IRQ_DEBUG */
-
-	/*
-	 * Always set TPR to limit maximum interrupt nesting depth to
-	 * 16 (without this, it would be ~240, which could easily lead
-	 * to kernel stack overflows).
-	 */
-	irq_enter();
-	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
-	ia64_srlz_d();
-	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (!IS_RESCHEDULE(vector)) {
-			ia64_setreg(_IA64_REG_CR_TPR, vector);
-			ia64_srlz_d();
-
-			__do_IRQ(local_vector_to_irq(vector), regs);
-
-			/*
-			 * Disable interrupts and send EOI:
-			 */
-			local_irq_disable();
-			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
-		}
-		ia64_eoi();
-		vector = ia64_get_ivr();
-	}
-	/*
-	 * This must be done *after* the ia64_eoi().  For example, the keyboard softirq
-	 * handler needs to be able to wait for further keyboard interrupts, which can't
-	 * come through until ia64_eoi() has been done.
-	 */
-	irq_exit();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * This function emulates a interrupt processing when a cpu is about to be
- * brought down.
- */
-void ia64_process_pending_intr(void)
-{
-	ia64_vector vector;
-	unsigned long saved_tpr;
-	extern unsigned int vectors_in_migration[NR_IRQS];
-
-	vector = ia64_get_ivr();
-
-	 irq_enter();
-	 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
-	 ia64_srlz_d();
-
-	 /*
-	  * Perform normal interrupt style processing
-	  */
-	while (vector != IA64_SPURIOUS_INT_VECTOR) {
-		if (!IS_RESCHEDULE(vector)) {
-			ia64_setreg(_IA64_REG_CR_TPR, vector);
-			ia64_srlz_d();
-
-			/*
-			 * Now try calling normal ia64_handle_irq as it would have got called
-			 * from a real intr handler. Try passing null for pt_regs, hopefully
-			 * it will work. I hope it works!.
-			 * Probably could shared code.
-			 */
-			vectors_in_migration[local_vector_to_irq(vector)]=0;
-			__do_IRQ(local_vector_to_irq(vector), NULL);
-
-			/*
-			 * Disable interrupts and send EOI
-			 */
-			local_irq_disable();
-			ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
-		}
-		ia64_eoi();
-		vector = ia64_get_ivr();
-	}
-	irq_exit();
-}
-#endif
-
-
-#ifdef CONFIG_SMP
-extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
-
-static struct irqaction ipi_irqaction = {
-	.handler =	handle_IPI,
-	.flags =	IRQF_DISABLED,
-	.name =		"IPI"
-};
-#endif
-
-#ifdef CONFIG_XEN
-#include <xen/evtchn.h>
-#include <xen/interface/callback.h>
-
-static DEFINE_PER_CPU(int, timer_irq) = -1;
-static DEFINE_PER_CPU(int, ipi_irq) = -1;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, cmc_irq) = -1;
-static DEFINE_PER_CPU(int, cmcp_irq) = -1;
-static DEFINE_PER_CPU(int, cpep_irq) = -1;
-static char timer_name[NR_CPUS][15];
-static char ipi_name[NR_CPUS][15];
-static char resched_name[NR_CPUS][15];
-static char cmc_name[NR_CPUS][15];
-static char cmcp_name[NR_CPUS][15];
-static char cpep_name[NR_CPUS][15];
-
-struct saved_irq {
-	unsigned int irq;
-	struct irqaction *action;
-};
-/* 16 should be far optimistic value, since only several percpu irqs
- * are registered early.
- */
-#define MAX_LATE_IRQ	16
-static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
-static unsigned short late_irq_cnt = 0;
-static unsigned short saved_irq_cnt = 0;
-static int xen_slab_ready = 0;
-
-#ifdef CONFIG_SMP
-/* Dummy stub. Though we may check RESCHEDULE_VECTOR before __do_IRQ,
- * it ends up to issue several memory accesses upon percpu data and
- * thus adds unnecessary traffic to other paths.
- */
-static irqreturn_t
-handle_reschedule(int irq, void *dev_id, struct pt_regs *regs)
-{
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction resched_irqaction = {
-	.handler =	handle_reschedule,
-	.flags =	SA_INTERRUPT,
-	.name =		"RESCHED"
-};
-#endif
-
-/*
- * This is xen version percpu irq registration, which needs bind
- * to xen specific evtchn sub-system. One trick here is that xen
- * evtchn binding interface depends on kmalloc because related
- * port needs to be freed at device/cpu down. So we cache the
- * registration on BSP before slab is ready and then deal them
- * at later point. For rest instances happening after slab ready,
- * we hook them to xen evtchn immediately.
- *
- * FIXME: MCA is not supported by far, and thus "nomca" boot param is
- * required.
- */
-static void
-xen_register_percpu_irq(unsigned int cpu, unsigned int vec,
-			 struct irqaction *action, int save)
-{
-	irq_desc_t *desc;
-	int irq = 0;
-
-	if (xen_slab_ready) {
-		switch (vec) {
-		case IA64_TIMER_VECTOR:
-			sprintf(timer_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
-				action->handler, action->flags,
-				timer_name[cpu], action->dev_id);
-			per_cpu(timer_irq,cpu) = irq;
-			break;
-		case IA64_IPI_RESCHEDULE:
-			sprintf(resched_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu,
-				action->handler, action->flags,
-				resched_name[cpu], action->dev_id);
-			per_cpu(resched_irq,cpu) = irq;
-			break;
-		case IA64_IPI_VECTOR:
-			sprintf(ipi_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(IPI_VECTOR, cpu,
-				action->handler, action->flags,
-				ipi_name[cpu], action->dev_id);
-			per_cpu(ipi_irq,cpu) = irq;
-			break;
-		case IA64_CMC_VECTOR:
-			sprintf(cmc_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu,
-			                              action->handler,
-			                              action->flags,
-			                              cmc_name[cpu],
-			                              action->dev_id);
-			per_cpu(cmc_irq,cpu) = irq;
-			break;
-		case IA64_CMCP_VECTOR:
-			sprintf(cmcp_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(CMCP_VECTOR, cpu,
-			                             action->handler,
-			                             action->flags,
-			                             cmcp_name[cpu],
-			                             action->dev_id);
-			per_cpu(cmcp_irq,cpu) = irq;
-			break;
-		case IA64_CPEP_VECTOR:
-			sprintf(cpep_name[cpu], "%s%d", action->name, cpu);
-			irq = bind_ipi_to_irqhandler(CPEP_VECTOR, cpu,
-			                             action->handler,
-			                             action->flags,
-			                             cpep_name[cpu],
-			                             action->dev_id);
-			per_cpu(cpep_irq,cpu) = irq;
-			break;
-		case IA64_CPE_VECTOR:
-		case IA64_MCA_RENDEZ_VECTOR:
-		case IA64_PERFMON_VECTOR:
-		case IA64_MCA_WAKEUP_VECTOR:
-		case IA64_SPURIOUS_INT_VECTOR:
-			/* No need to complain, these aren't supported. */
-			break;
-		default:
-			printk(KERN_WARNING "Percpu irq %d is unsupported "
-			       "by xen!\n", vec);
-			break;
-		}
-		BUG_ON(irq < 0);
-
-		if (irq > 0) {
-			/*
-			 * Mark percpu.  Without this, migrate_irqs() will
-			 * mark the interrupt for migrations and trigger it
-			 * on cpu hotplug.
-			 */
-			desc = irq_desc + irq;
-			desc->status |= IRQ_PER_CPU;
-		}
-	} 
-
-	/* For BSP, we cache registered percpu irqs, and then re-walk
-	 * them when initializing APs
-	 */
-	if (!cpu && save) {
-		BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
-		saved_percpu_irqs[saved_irq_cnt].irq = vec;
-		saved_percpu_irqs[saved_irq_cnt].action = action;
-		saved_irq_cnt++;
-		if (!xen_slab_ready)
-			late_irq_cnt++;
-	}
-}
-
-static void
-xen_bind_early_percpu_irq (void)
-{
-	int i;
-
-	xen_slab_ready = 1;
-	/* There's no race when accessing this cached array, since only
-	 * BSP will face with such step shortly
-	 */
-	for (i = 0; i < late_irq_cnt; i++)
-		xen_register_percpu_irq(smp_processor_id(),
-					saved_percpu_irqs[i].irq,
-		                        saved_percpu_irqs[i].action, 0);
-}
-
-/* FIXME: There's no obvious point to check whether slab is ready. So
- * a hack is used here by utilizing a late time hook.
- */
-extern void (*late_time_init)(void);
-extern char xen_event_callback;
-extern void xen_init_IRQ(void);
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int __devinit
-unbind_evtchn_callback(struct notifier_block *nfb,
-                       unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	if (action == CPU_DEAD) {
-		/* Unregister evtchn.  */
-		if (per_cpu(cpep_irq,cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL);
-			per_cpu(cpep_irq, cpu) = -1;
-		}
-		if (per_cpu(cmcp_irq,cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL);
-			per_cpu(cmcp_irq, cpu) = -1;
-		}
-		if (per_cpu(cmc_irq,cpu) >= 0) {
-			unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL);
-			per_cpu(cmc_irq, cpu) = -1;
-		}
-		if (per_cpu(ipi_irq,cpu) >= 0) {
-			unbind_from_irqhandler (per_cpu(ipi_irq, cpu), NULL);
-			per_cpu(ipi_irq, cpu) = -1;
-		}
-		if (per_cpu(resched_irq,cpu) >= 0) {
-			unbind_from_irqhandler (per_cpu(resched_irq, cpu),
-						NULL);
-			per_cpu(resched_irq, cpu) = -1;
-		}
-		if (per_cpu(timer_irq,cpu) >= 0) {
-			unbind_from_irqhandler (per_cpu(timer_irq, cpu), NULL);
-			per_cpu(timer_irq, cpu) = -1;
-		}
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block unbind_evtchn_notifier = {
-	.notifier_call = unbind_evtchn_callback,
-	.priority = 0
-};
-#endif
-
-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
-void xen_smp_intr_init_early(unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	unsigned int i;
-
-	for (i = 0; i < saved_irq_cnt; i++)
-		xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq,
-		                        saved_percpu_irqs[i].action, 0);
-#endif
-}
-
-void xen_smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-	unsigned int cpu = smp_processor_id();
-	struct callback_register event = {
-		.type = CALLBACKTYPE_event,
-		.address = (unsigned long)&xen_event_callback,
-	};
-
-	if (cpu == 0) {
-		/* Initialization was already done for boot cpu.  */
-#ifdef CONFIG_HOTPLUG_CPU
-		/* Register the notifier only once.  */
-		register_cpu_notifier(&unbind_evtchn_notifier);
-#endif
-		return;
-	}
-
-	/* This should be piggyback when setup vcpu guest context */
-	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
-#endif /* CONFIG_SMP */
-}
-
-void
-xen_irq_init(void)
-{
-	struct callback_register event = {
-		.type = CALLBACKTYPE_event,
-		.address = (unsigned long)&xen_event_callback,
-	};
-
-	xen_init_IRQ();
-	BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
-	late_time_init = xen_bind_early_percpu_irq;
-#ifdef CONFIG_SMP
-	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
-#endif
-}
-
-void
-xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect)
-{
-	int irq = -1;
-
-#ifdef CONFIG_SMP
-	/* TODO: we need to call vcpu_up here */
-	if (unlikely(vector == ap_wakeup_vector)) {
-		extern void xen_send_ipi (int cpu, int vec);
-
-		/* XXX
-		 * This should be in __cpu_up(cpu) in ia64 smpboot.c
-		 * like x86. But don't want to modify it,
-		 * keep it untouched.
-		 */
-		xen_smp_intr_init_early(cpu);
-
-		xen_send_ipi (cpu, vector);
-		//vcpu_prepare_and_up(cpu);
-		return;
-	}
-#endif
-
-	switch (vector) {
-		case IA64_IPI_VECTOR:
-			irq = per_cpu(ipi_to_irq, cpu)[IPI_VECTOR];
-			break;
-		case IA64_IPI_RESCHEDULE:
-			irq = per_cpu(ipi_to_irq, cpu)[RESCHEDULE_VECTOR];
-			break;
-		case IA64_CMCP_VECTOR:
-			irq = per_cpu(ipi_to_irq, cpu)[CMCP_VECTOR];
-			break;
-		case IA64_CPEP_VECTOR:
-			irq = per_cpu(ipi_to_irq, cpu)[CPEP_VECTOR];
-			break;
-		default:
-			printk(KERN_WARNING "Unsupported IPI type 0x%x\n",
-			       vector);
-			irq = 0;
-			break;
-	}		
-	
-	BUG_ON(irq < 0);
-	notify_remote_via_irq(irq);
-	return;
-}
-#endif /* CONFIG_XEN */
-
-void
-register_percpu_irq (ia64_vector vec, struct irqaction *action)
-{
-	irq_desc_t *desc;
-	unsigned int irq;
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen())
-		return xen_register_percpu_irq(smp_processor_id(), 
-					       vec, action, 1);
-#endif
-
-	for (irq = 0; irq < NR_IRQS; ++irq)
-		if (irq_to_vector(irq) == vec) {
-			desc = irq_desc + irq;
-			desc->status |= IRQ_PER_CPU;
-			desc->chip = &irq_type_ia64_lsapic;
-			if (action)
-				setup_irq(irq, action);
-		}
-}
-
-void __init
-init_IRQ (void)
-{
-	register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
-#ifdef CONFIG_SMP
-	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
-#endif
-#ifdef CONFIG_PERFMON
-	pfm_init_percpu();
-#endif
-	platform_irq_init();
-#ifdef CONFIG_XEN
-	if (is_running_on_xen() && !ia64_platform_is("xen"))
-		xen_irq_init();
-#endif
-}
-
-void
-ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
-{
-	void __iomem *ipi_addr;
-	unsigned long ipi_data;
-	unsigned long phys_cpu_id;
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		xen_platform_send_ipi(cpu, vector, delivery_mode, redirect);
-		return;
-	}
-#endif
-
-#ifdef CONFIG_SMP
-	phys_cpu_id = cpu_physical_id(cpu);
-#else
-	phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
-#endif
-
-	/*
-	 * cpu number is in 8bit ID and 8bit EID
-	 */
-
-	ipi_data = (delivery_mode << 8) | (vector & 0xff);
-	ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
-
-	writeq(ipi_data, ipi_addr);
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S b/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
deleted file mode 100644
index af5cc0bc41..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/pal.S
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * PAL Firmware support
- * IA-64 Processor Programmers Reference Vol 2
- *
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
- *	David Mosberger <davidm@hpl.hp.com>
- *	Stephane Eranian <eranian@hpl.hp.com>
- *
- * 05/22/2000 eranian Added support for stacked register calls
- * 05/24/2000 eranian Added support for physical mode static calls
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-
-	.data
-	.globl pal_entry_point
-pal_entry_point:
-	data8 ia64_pal_default_handler
-	.text
-
-/*
- * Set the PAL entry point address.  This could be written in C code, but we do it here
- * to keep it all in one module (besides, it's so trivial that it's
- * not a big deal).
- *
- * in0		Address of the PAL entry point (text address, NOT a function descriptor).
- */
-GLOBAL_ENTRY(ia64_pal_handler_init)
-	alloc r3=ar.pfs,1,0,0,0
-	movl r2=pal_entry_point
-	;;
-	st8 [r2]=in0
-	br.ret.sptk.many rp
-END(ia64_pal_handler_init)
-
-/*
- * Default PAL call handler.  This needs to be coded in assembly because it uses
- * the static calling convention, i.e., the RSE may not be used and calls are
- * done via "br.cond" (not "br.call").
- */
-GLOBAL_ENTRY(ia64_pal_default_handler)
-	mov r8=-1
-	br.cond.sptk.many rp
-END(ia64_pal_default_handler)
-
-/*
- * Make a PAL call using the static calling convention.
- *
- * in0         Index of PAL service
- * in1 - in3   Remaining PAL arguments
- * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
- *
- */
-GLOBAL_ENTRY(__ia64_pal_call_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc loc1 = ar.pfs,5,5,0,0
-	movl loc2 = pal_entry_point
-1:	{
-	  mov r28 = in0
-	  mov r29 = in1
-	  mov r8 = ip
-	}
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	tbit.nz p6,p7 = in4, 0
-	adds r8 = 1f-1b,r8
-	mov loc4=ar.rsc			// save RSE configuration
-	;;
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov loc3 = psr
-	mov loc0 = rp
-	.body
-	mov r30 = in2
-
-(p6)	rsm psr.i | psr.ic
-	mov r31 = in3
-	mov b7 = loc2
-
-(p7)	rsm psr.i
-	;;
-(p6)	srlz.i
-	mov rp = r8
-	br.cond.sptk.many b7
-1:	mov psr.l = loc3
-	mov ar.rsc = loc4		// restore RSE configuration
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(__ia64_pal_call_static)
-
-/*
- * Make a PAL call using the stacked registers calling convention.
- *
- * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_stacked)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
-	alloc loc1 = ar.pfs,4,4,4,0
-	movl loc2 = pal_entry_point
-
-	mov r28  = in0			// Index MUST be copied to r28
-	mov out0 = in0			// AND in0 of PAL function
-	mov loc0 = rp
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov out1 = in1
-	mov out2 = in2
-	mov out3 = in3
-	mov loc3 = psr
-	;;
-	rsm psr.i
-	mov b7 = loc2
-	;;
-	br.call.sptk.many rp=b7		// now make the call
-.ret0:	mov psr.l  = loc3
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	srlz.d				// serialize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_stacked)
-
-/*
- * Make a physical mode PAL call using the static registers calling convention.
- *
- * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
- *
- * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
- * So we don't need to clear them.
- */
-#define PAL_PSR_BITS_TO_CLEAR							\
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |	\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |		\
-	 IA64_PSR_DFL | IA64_PSR_DFH)
-
-#define PAL_PSR_BITS_TO_SET							\
-	(IA64_PSR_BN)
-
-
-GLOBAL_ENTRY(ia64_pal_call_phys_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
-	alloc loc1 = ar.pfs,4,7,0,0
-	movl loc2 = pal_entry_point
-1:	{
-	  mov r28  = in0		// copy procedure index
-	  mov r8   = ip			// save ip to compute branch
-	  mov loc0 = rp			// save rp
-	}
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov r29  = in1			// first argument
-	mov r30  = in2			// copy arg2
-	mov r31  = in3			// copy arg3
-	;;
-	mov loc3 = psr			// save psr
-	adds r8  = 1f-1b,r8		// calculate return address for call
-	;;
-	mov loc4=ar.rsc			// save RSE configuration
-	dep.z loc2=loc2,0,61		// convert pal entry point to physical
-	tpa r8=r8			// convert rp to physical
-	;;
-	mov b7 = loc2			// install target to branch reg
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	movl r16=PAL_PSR_BITS_TO_CLEAR
-	movl r17=PAL_PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17		// add in psr the bits to set
-	;;
-	andcm r16=loc3,r16		// removes bits to clear from psr
-	br.call.sptk.many rp=ia64_switch_mode_phys
-.ret1:	mov rp = r8			// install return address (physical)
-	mov loc5 = r19
-	mov loc6 = r20
-	br.cond.sptk.many b7
-1:
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3			// r16= original psr
-	mov r19=loc5
-	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-.ret2:
-	mov psr.l = loc3		// restore init PSR
-
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	mov ar.rsc=loc4			// restore RSE configuration
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_phys_static)
-
-/*
- * Make a PAL call using the stacked registers in physical mode.
- *
- * Inputs:
- * 	in0         Index of PAL service
- * 	in2 - in3   Remaning PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc	loc1 = ar.pfs,5,7,4,0
-	movl	loc2 = pal_entry_point
-1:	{
-	  mov r28  = in0		// copy procedure index
-	  mov loc0 = rp		// save rp
-	}
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov loc3 = psr			// save psr
-	;;
-	mov loc4=ar.rsc			// save RSE configuration
-	dep.z loc2=loc2,0,61		// convert pal entry point to physical
-	;;
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	movl r16=PAL_PSR_BITS_TO_CLEAR
-	movl r17=PAL_PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17		// add in psr the bits to set
-	mov b7 = loc2			// install target to branch reg
-	;;
-	andcm r16=loc3,r16		// removes bits to clear from psr
-	br.call.sptk.many rp=ia64_switch_mode_phys
-
-	mov out0 = in0			// first argument
-	mov out1 = in1			// copy arg2
-	mov out2 = in2			// copy arg3
-	mov out3 = in3			// copy arg3
-	mov loc5 = r19
-	mov loc6 = r20
-
-	br.call.sptk.many rp=b7		// now make the call
-
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3			// r16= original psr
-	mov r19=loc5
-	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt	// return to virtual mode
-
-	mov psr.l  = loc3		// restore init PSR
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	mov ar.rsc=loc4			// restore RSE configuration
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_phys_stacked)
-
-/*
- * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
- *
- * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
- * regs fp-low partition.
- *
- * Inputs:
- *      in0	Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_save_scratch_fpregs)
-	alloc r3=ar.pfs,1,0,0,0
-	add r2=16,in0
-	;;
-	stf.spill [in0] = f10,32
-	stf.spill [r2]  = f11,32
-	;;
-	stf.spill [in0] = f12,32
-	stf.spill [r2]  = f13,32
-	;;
-	stf.spill [in0] = f14,32
-	stf.spill [r2]  = f15,32
-	br.ret.sptk.many rp
-END(ia64_save_scratch_fpregs)
-
-/*
- * Load scratch fp scratch regs (fp10-fp15)
- *
- * Inputs:
- *      in0	Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_load_scratch_fpregs)
-	alloc r3=ar.pfs,1,0,0,0
-	add r2=16,in0
-	;;
-	ldf.fill  f10 = [in0],32
-	ldf.fill  f11 = [r2],32
-	;;
-	ldf.fill  f12 = [in0],32
-	ldf.fill  f13 = [r2],32
-	;;
-	ldf.fill  f14 = [in0],32
-	ldf.fill  f15 = [r2],32
-	br.ret.sptk.many rp
-END(ia64_load_scratch_fpregs)
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c b/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
deleted file mode 100644
index 73597d2866..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/patch.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Instruction-patching support.
- *
- * Copyright (C) 2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-#include <linux/init.h>
-#include <linux/string.h>
-
-#include <asm/patch.h>
-#include <asm/processor.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-
-/*
- * This was adapted from code written by Tony Luck:
- *
- * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
- * like this:
- *
- * 6  6         5         4         3         2         1
- * 3210987654321098765432109876543210987654321098765432109876543210
- * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
- *
- * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
- */
-static u64
-get_imm64 (u64 insn_addr)
-{
-	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */
-
-	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
-		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
-		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
-		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
-		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
-		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
-		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
-}
-
-/* Patch instruction with "val" where "mask" has 1 bits. */
-void
-ia64_patch (u64 insn_addr, u64 mask, u64 val)
-{
-	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
-#	define insn_mask ((1UL << 41) - 1)
-	unsigned long shift;
-
-	b0 = b[0]; b1 = b[1];
-	shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
-	if (shift >= 64) {
-		m1 = mask << (shift - 64);
-		v1 = val << (shift - 64);
-	} else {
-		m0 = mask << shift; m1 = mask >> (64 - shift);
-		v0 = val  << shift; v1 = val >> (64 - shift);
-		b[0] = (b0 & ~m0) | (v0 & m0);
-	}
-	b[1] = (b1 & ~m1) | (v1 & m1);
-}
-
-void
-ia64_patch_imm64 (u64 insn_addr, u64 val)
-{
-	/* The assembler may generate offset pointing to either slot 1
-	   or slot 2 for a long (2-slot) instruction, occupying slots 1
-	   and 2.  */
-  	insn_addr &= -16UL;
-	ia64_patch(insn_addr + 2,
-		   0x01fffefe000UL, (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
-				     | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
-				     | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
-				     | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
-				     | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */));
-	ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
-}
-
-void
-ia64_patch_imm60 (u64 insn_addr, u64 val)
-{
-	/* The assembler may generate offset pointing to either slot 1
-	   or slot 2 for a long (2-slot) instruction, occupying slots 1
-	   and 2.  */
-  	insn_addr &= -16UL;
-	ia64_patch(insn_addr + 2,
-		   0x011ffffe000UL, (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
-				     | ((val & 0x00000000000fffffUL) << 13) /* bit  0 -> 13 */));
-	ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18);
-}
-
-/*
- * We need sometimes to load the physical address of a kernel
- * object.  Often we can convert the virtual address to physical
- * at execution time, but sometimes (either for performance reasons
- * or during error recovery) we cannot to this.  Patch the marked
- * bundles to load the physical address.
- */
-void __init
-ia64_patch_vtop (unsigned long start, unsigned long end)
-{
-	s32 *offp = (s32 *) start;
-	u64 ip;
-
-	while (offp < (s32 *) end) {
-		ip = (u64) offp + *offp;
-
-		/* replace virtual address with corresponding physical address: */
-		ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
-		ia64_fc((void *) ip);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-void __init
-ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
-{
-	static int first_time = 1;
-	int need_workaround;
-	s32 *offp = (s32 *) start;
-	u64 *wp;
-
-	need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
-
-	if (first_time) {
-		first_time = 0;
-		if (need_workaround)
-			printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
-		else
-			printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
-			       "disabling it\n");
-	}
-	if (need_workaround)
-		return;
-
-	while (offp < (s32 *) end) {
-		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
-		ia64_fc(wp); ia64_fc(wp + 2);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void __init
-patch_fsyscall_table (unsigned long start, unsigned long end)
-{
-	extern unsigned long fsyscall_table[NR_syscalls];
-	s32 *offp = (s32 *) start;
-	u64 ip;
-
-	while (offp < (s32 *) end) {
-		ip = (u64) ia64_imva((char *) offp + *offp);
-		ia64_patch_imm64(ip, (u64) fsyscall_table);
-		ia64_fc((void *) ip);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void __init
-patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
-{
-	extern char fsys_bubble_down[];
-	s32 *offp = (s32 *) start;
-	u64 ip;
-
-	while (offp < (s32 *) end) {
-		ip = (u64) offp + *offp;
-		ia64_patch_imm60((u64) ia64_imva((void *) ip),
-				 (u64) (fsys_bubble_down - (ip & -16)) / 16);
-		ia64_fc((void *) ip);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-extern char __start_gate_running_on_xen_patchlist[];
-extern char __end_gate_running_on_xen_patchlist[];
-
-void
-patch_running_on_xen(unsigned long start, unsigned long end)
-{
-	extern int running_on_xen;
-	s32 *offp = (s32 *)start;
-	u64 ip;
-
-	while (offp < (s32 *)end) {
-		ip = (u64)ia64_imva((char *)offp + *offp);
-		ia64_patch_imm64(ip, (u64)&running_on_xen);
-		ia64_fc((void *)ip);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void
-patch_brl_symaddr(unsigned long start, unsigned long end,
-                  unsigned long symaddr)
-{
-	s32 *offp = (s32 *)start;
-	u64 ip;
-
-	while (offp < (s32 *)end) {
-		ip = (u64)offp + *offp;
-		ia64_patch_imm60((u64)ia64_imva((void *)ip),
-				 (u64)(symaddr - (ip & -16)) / 16);
-		ia64_fc((void *)ip);
-		++offp;
-	}
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-#define EXTERN_PATCHLIST(name)					\
-	extern char __start_gate_brl_##name##_patchlist[];	\
-	extern char __end_gate_brl_##name##_patchlist[];	\
-	extern char name[]
-
-#define PATCH_BRL_SYMADDR(name)						\
-	patch_brl_symaddr((unsigned long)__start_gate_brl_##name##_patchlist, \
-	                  (unsigned long)__end_gate_brl_##name##_patchlist,   \
-	                  (unsigned long)name)
-
-static void
-patch_brl_in_vdso(void)
-{
-	EXTERN_PATCHLIST(xen_ssm_i_0);
-	EXTERN_PATCHLIST(xen_ssm_i_1);
-
-	PATCH_BRL_SYMADDR(xen_ssm_i_0);
-	PATCH_BRL_SYMADDR(xen_ssm_i_1);
-}
-#else
-#define patch_running_on_xen(start, end)	do { } while (0)
-#define patch_brl_in_vdso()			do { } while (0)
-#endif
-
-void __init
-ia64_patch_gate (void)
-{
-#	define START(name)	((unsigned long) __start_gate_##name##_patchlist)
-#	define END(name)	((unsigned long)__end_gate_##name##_patchlist)
-
-	patch_fsyscall_table(START(fsyscall), END(fsyscall));
-	patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
-#ifdef CONFIG_XEN
-	patch_running_on_xen(START(running_on_xen), END(running_on_xen));
-	patch_brl_in_vdso();
-#endif
-	ia64_patch_vtop(START(vtop), END(vtop));
-	ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
deleted file mode 100644
index 59d277fb8b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/perfmon.c
+++ /dev/null
@@ -1,6943 +0,0 @@
-/*
- * This file implements the perfmon-2 subsystem which is used
- * to program the IA-64 Performance Monitoring Unit (PMU).
- *
- * The initial version of perfmon.c was written by
- * Ganesh Venkitachalam, IBM Corp.
- *
- * Then it was modified for perfmon-1.x by Stephane Eranian and
- * David Mosberger, Hewlett Packard Co.
- *
- * Version Perfmon-2.x is a rewrite of perfmon-1.x
- * by Stephane Eranian, Hewlett Packard Co.
- *
- * Copyright (C) 1999-2005  Hewlett Packard Co
- *               Stephane Eranian <eranian@hpl.hp.com>
- *               David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * More information about perfmon available at:
- * 	http://www.hpl.hp.com/research/linux/perfmon
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/list.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/vfs.h>
-#include <linux/pagemap.h>
-#include <linux/mount.h>
-#include <linux/bitops.h>
-#include <linux/capability.h>
-#include <linux/rcupdate.h>
-#include <linux/completion.h>
-
-#include <asm/errno.h>
-#include <asm/intrinsics.h>
-#include <asm/page.h>
-#include <asm/perfmon.h>
-#include <asm/processor.h>
-#include <asm/signal.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-
-#ifdef CONFIG_PERFMON
-#ifdef CONFIG_XEN
-//#include <xen/xenoprof.h>
-#include <xen/interface/xenoprof.h>
-
-static int xenoprof_is_primary = 0;
-#define init_xenoprof_primary(is_primary)  (xenoprof_is_primary = (is_primary))
-#define is_xenoprof_primary()	(xenoprof_is_primary)
-#define XEN_NOT_SUPPORTED_YET						\
-	do {								\
-		if (is_running_on_xen()) {				\
-			printk("%s is not supported yet under xen.\n",	\
-			       __func__);				\
-			return -ENOSYS;					\
-		}							\
-	} while (0)
-#else
-#define init_xenoprof_primary(is_primary)	do { } while (0)
-#define is_xenoprof_primary()			(0)
-#define XEN_NOT_SUPPORTED_YET			do { } while (0)
-#define HYPERVISOR_perfmon_op(cmd, arg, count)	do { } while (0)
-#endif
-
-/*
- * perfmon context state
- */
-#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
-#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
-#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
-#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
-
-#define PFM_INVALID_ACTIVATION	(~0UL)
-
-/*
- * depth of message queue
- */
-#define PFM_MAX_MSGS		32
-#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
-
-/*
- * type of a PMU register (bitmask).
- * bitmask structure:
- * 	bit0   : register implemented
- * 	bit1   : end marker
- * 	bit2-3 : reserved
- * 	bit4   : pmc has pmc.pm
- * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
- * 	bit6-7 : register type
- * 	bit8-31: reserved
- */
-#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
-#define PFM_REG_IMPL		0x1 /* register implemented */
-#define PFM_REG_END		0x2 /* end marker */
-#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
-#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
-#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
-#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
-#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
-
-#define PMC_IS_LAST(i)	(pmu_conf->pmc_desc[i].type & PFM_REG_END)
-#define PMD_IS_LAST(i)	(pmu_conf->pmd_desc[i].type & PFM_REG_END)
-
-#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
-
-/* i assumed unsigned */
-#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
-#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
-
-/* XXX: these assume that register i is implemented */
-#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
-#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
-#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
-#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)
-
-#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
-#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
-#define PMD_PMD_DEP(i)	   pmu_conf->pmd_desc[i].dep_pmd[0]
-#define PMC_PMD_DEP(i)	   pmu_conf->pmc_desc[i].dep_pmd[0]
-
-#define PFM_NUM_IBRS	  IA64_NUM_DBG_REGS
-#define PFM_NUM_DBRS	  IA64_NUM_DBG_REGS
-
-#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
-#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
-#define PFM_CTX_TASK(h)		(h)->ctx_task
-
-#define PMU_PMC_OI		5 /* position of pmc.oi bit */
-
-/* XXX: does not support more than 64 PMDs */
-#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
-#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
-
-#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
-
-#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
-#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
-#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
-#define PFM_CODE_RR	0	/* requesting code range restriction */
-#define PFM_DATA_RR	1	/* requestion data range restriction */
-
-#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
-#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
-#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)
-
-#define RDEP(x)	(1UL<<(x))
-
-/*
- * context protection macros
- * in SMP:
- * 	- we need to protect against CPU concurrency (spin_lock)
- * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
- * in UP:
- * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
- *
- * spin_lock_irqsave()/spin_lock_irqrestore():
- * 	in SMP: local_irq_disable + spin_lock
- * 	in UP : local_irq_disable
- *
- * spin_lock()/spin_lock():
- * 	in UP : removed automatically
- * 	in SMP: protect against context accesses from other CPU. interrupts
- * 	        are not masked. This is useful for the PMU interrupt handler
- * 	        because we know we will not get PMU concurrency in that code.
- */
-#define PROTECT_CTX(c, f) \
-	do {  \
-		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
-		spin_lock_irqsave(&(c)->ctx_lock, f); \
-		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
-	} while(0)
-
-#define UNPROTECT_CTX(c, f) \
-	do { \
-		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
-		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
-	} while(0)
-
-#define PROTECT_CTX_NOPRINT(c, f) \
-	do {  \
-		spin_lock_irqsave(&(c)->ctx_lock, f); \
-	} while(0)
-
-
-#define UNPROTECT_CTX_NOPRINT(c, f) \
-	do { \
-		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
-	} while(0)
-
-
-#define PROTECT_CTX_NOIRQ(c) \
-	do {  \
-		spin_lock(&(c)->ctx_lock); \
-	} while(0)
-
-#define UNPROTECT_CTX_NOIRQ(c) \
-	do { \
-		spin_unlock(&(c)->ctx_lock); \
-	} while(0)
-
-
-#ifdef CONFIG_SMP
-
-#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
-#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
-#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
-
-#else /* !CONFIG_SMP */
-#define SET_ACTIVATION(t) 	do {} while(0)
-#define GET_ACTIVATION(t) 	do {} while(0)
-#define INC_ACTIVATION(t) 	do {} while(0)
-#endif /* CONFIG_SMP */
-
-#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
-#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
-#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)
-
-#define LOCK_PFS(g)	    	spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
-#define UNLOCK_PFS(g)	    	spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
-
-#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
-
-/*
- * cmp0 must be the value of pmc0
- */
-#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
-
-#define PFMFS_MAGIC 0xa0b4d889
-
-/*
- * debugging
- */
-#define PFM_DEBUGGING 1
-#ifdef PFM_DEBUGGING
-#define DPRINT(a) \
-	do { \
-		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
-	} while (0)
-
-#define DPRINT_ovfl(a) \
-	do { \
-		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
-	} while (0)
-#endif
-
-/*
- * 64-bit software counter structure
- *
- * the next_reset_type is applied to the next call to pfm_reset_regs()
- */
-typedef struct {
-	unsigned long	val;		/* virtual 64bit counter value */
-	unsigned long	lval;		/* last reset value */
-	unsigned long	long_reset;	/* reset value on sampling overflow */
-	unsigned long	short_reset;    /* reset value on overflow */
-	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
-	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
-	unsigned long	seed;		/* seed for random-number generator */
-	unsigned long	mask;		/* mask for random-number generator */
-	unsigned int 	flags;		/* notify/do not notify */
-	unsigned long	eventid;	/* overflow event identifier */
-} pfm_counter_t;
-
-/*
- * context flags
- */
-typedef struct {
-	unsigned int block:1;		/* when 1, task will blocked on user notifications */
-	unsigned int system:1;		/* do system wide monitoring */
-	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
-	unsigned int is_sampling:1;	/* true if using a custom format */
-	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
-	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
-	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
-	unsigned int no_msg:1;		/* no message sent on overflow */
-	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
-	unsigned int reserved:22;
-} pfm_context_flags_t;
-
-#define PFM_TRAP_REASON_NONE		0x0	/* default value */
-#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
-#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */
-
-
-/*
- * perfmon context: encapsulates all the state of a monitoring session
- */
-
-typedef struct pfm_context {
-	spinlock_t		ctx_lock;		/* context protection */
-
-	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
-	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
-
-	struct task_struct 	*ctx_task;		/* task to which context is attached */
-
-	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
-
-	struct completion	ctx_restart_done;  	/* use for blocking notification mode */
-
-	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
-	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
-	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */
-
-	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
-	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
-	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
-
-	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
-
-	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
-	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
-	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
-	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
-
-	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
-
-	u64			ctx_saved_psr_up;	/* only contains psr.up value */
-
-	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
-	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
-	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
-
-	int			ctx_fd;			/* file descriptor used my this context */
-	pfm_ovfl_arg_t		ctx_ovfl_arg;		/* argument to custom buffer format handler */
-
-	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
-	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
-	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
-	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */
-
-	wait_queue_head_t 	ctx_msgq_wait;
-	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
-	int			ctx_msgq_head;
-	int			ctx_msgq_tail;
-	struct fasync_struct	*ctx_async_queue;
-
-	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
-} pfm_context_t;
-
-/*
- * magic number used to verify that structure is really
- * a perfmon context
- */
-#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)
-
-#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)
-
-#ifdef CONFIG_SMP
-#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
-#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
-#else
-#define SET_LAST_CPU(ctx, v)	do {} while(0)
-#define GET_LAST_CPU(ctx)	do {} while(0)
-#endif
-
-
-#define ctx_fl_block		ctx_flags.block
-#define ctx_fl_system		ctx_flags.system
-#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
-#define ctx_fl_is_sampling	ctx_flags.is_sampling
-#define ctx_fl_excl_idle	ctx_flags.excl_idle
-#define ctx_fl_going_zombie	ctx_flags.going_zombie
-#define ctx_fl_trap_reason	ctx_flags.trap_reason
-#define ctx_fl_no_msg		ctx_flags.no_msg
-#define ctx_fl_can_restart	ctx_flags.can_restart
-
-#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
-#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
-
-/*
- * global information about all sessions
- * mostly used to synchronize between system wide and per-process
- */
-typedef struct {
-	spinlock_t		pfs_lock;		   /* lock the structure */
-
-	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
-	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
-	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
-	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
-	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
-} pfm_session_t;
-
-/*
- * information about a PMC or PMD.
- * dep_pmd[]: a bitmask of dependent PMD registers
- * dep_pmc[]: a bitmask of dependent PMC registers
- */
-typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
-typedef struct {
-	unsigned int		type;
-	int			pm_pos;
-	unsigned long		default_value;	/* power-on default value */
-	unsigned long		reserved_mask;	/* bitmask of reserved bits */
-	pfm_reg_check_t		read_check;
-	pfm_reg_check_t		write_check;
-	unsigned long		dep_pmd[4];
-	unsigned long		dep_pmc[4];
-} pfm_reg_desc_t;
-
-/* assume cnum is a valid monitor */
-#define PMC_PM(cnum, val)	(((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
-
-/*
- * This structure is initialized at boot time and contains
- * a description of the PMU main characteristics.
- *
- * If the probe function is defined, detection is based
- * on its return value: 
- * 	- 0 means recognized PMU
- * 	- anything else means not supported
- * When the probe function is not defined, then the pmu_family field
- * is used and it must match the host CPU family such that:
- * 	- cpu->family & config->pmu_family != 0
- */
-typedef struct {
-	unsigned long  ovfl_val;	/* overflow value for counters */
-
-	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
-	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
-
-	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
-	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
-	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
-	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */
-
-	char	      *pmu_name;	/* PMU family name */
-	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */
-	unsigned int  flags;		/* pmu specific flags */
-	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
-	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
-	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */
-	int           (*probe)(void);   /* customized probe routine */
-	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
-} pmu_config_t;
-/*
- * PMU specific flags
- */
-#define PFM_PMU_IRQ_RESEND	1	/* PMU needs explicit IRQ resend */
-
-/*
- * debug register related type definitions
- */
-typedef struct {
-	unsigned long ibr_mask:56;
-	unsigned long ibr_plm:4;
-	unsigned long ibr_ig:3;
-	unsigned long ibr_x:1;
-} ibr_mask_reg_t;
-
-typedef struct {
-	unsigned long dbr_mask:56;
-	unsigned long dbr_plm:4;
-	unsigned long dbr_ig:2;
-	unsigned long dbr_w:1;
-	unsigned long dbr_r:1;
-} dbr_mask_reg_t;
-
-typedef union {
-	unsigned long  val;
-	ibr_mask_reg_t ibr;
-	dbr_mask_reg_t dbr;
-} dbreg_t;
-
-
-/*
- * perfmon command descriptions
- */
-typedef struct {
-	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-	char		*cmd_name;
-	int		cmd_flags;
-	unsigned int	cmd_narg;
-	size_t		cmd_argsize;
-	int		(*cmd_getsize)(void *arg, size_t *sz);
-} pfm_cmd_desc_t;
-
-#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
-#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
-#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
-#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
-
-
-#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[(cmd)].cmd_name
-#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
-#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
-#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
-#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
-
-#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
-
-typedef struct {
-	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
-	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
-	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
-	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
-	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
-	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
-	unsigned long pfm_smpl_handler_calls;
-	unsigned long pfm_smpl_handler_cycles;
-	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
-} pfm_stats_t;
-
-/*
- * perfmon internal variables
- */
-static pfm_stats_t		pfm_stats[NR_CPUS];
-static pfm_session_t		pfm_sessions;	/* global sessions information */
-
-static DEFINE_SPINLOCK(pfm_alt_install_check);
-static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
-
-static struct proc_dir_entry 	*perfmon_dir;
-static pfm_uuid_t		pfm_null_uuid = {0,};
-
-static spinlock_t		pfm_buffer_fmt_lock;
-static LIST_HEAD(pfm_buffer_fmt_list);
-
-static pmu_config_t		*pmu_conf;
-
-/* sysctl() controls */
-pfm_sysctl_t pfm_sysctl;
-EXPORT_SYMBOL(pfm_sysctl);
-
-static ctl_table pfm_ctl_table[]={
-	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
-	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
-	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
-	{4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
-	{ 0, },
-};
-static ctl_table pfm_sysctl_dir[] = {
-	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
- 	{0,},
-};
-static ctl_table pfm_sysctl_root[] = {
-	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
- 	{0,},
-};
-static struct ctl_table_header *pfm_sysctl_header;
-
-static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-
-#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
-#define pfm_get_cpu_data(a,b)		per_cpu(a, b)
-
-static inline void
-pfm_put_task(struct task_struct *task)
-{
-	if (task != current) put_task_struct(task);
-}
-
-static inline void
-pfm_set_task_notify(struct task_struct *task)
-{
-	struct thread_info *info;
-
-	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
-	set_bit(TIF_NOTIFY_RESUME, &info->flags);
-}
-
-static inline void
-pfm_clear_task_notify(void)
-{
-	clear_thread_flag(TIF_NOTIFY_RESUME);
-}
-
-static inline void
-pfm_reserve_page(unsigned long a)
-{
-	SetPageReserved(vmalloc_to_page((void *)a));
-}
-static inline void
-pfm_unreserve_page(unsigned long a)
-{
-	ClearPageReserved(vmalloc_to_page((void*)a));
-}
-
-static inline unsigned long
-pfm_protect_ctx_ctxsw(pfm_context_t *x)
-{
-	spin_lock(&(x)->ctx_lock);
-	return 0UL;
-}
-
-static inline void
-pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
-{
-	spin_unlock(&(x)->ctx_lock);
-}
-
-static inline unsigned int
-pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
-{
-	return do_munmap(mm, addr, len);
-}
-
-static inline unsigned long 
-pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
-{
-	return get_unmapped_area(file, addr, len, pgoff, flags);
-}
-
-
-static int
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
-	     struct vfsmount *mnt)
-{
-	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
-}
-
-static struct file_system_type pfm_fs_type = {
-	.name     = "pfmfs",
-	.get_sb   = pfmfs_get_sb,
-	.kill_sb  = kill_anon_super,
-};
-
-DEFINE_PER_CPU(unsigned long, pfm_syst_info);
-DEFINE_PER_CPU(struct task_struct *, pmu_owner);
-DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
-DEFINE_PER_CPU(unsigned long, pmu_activation_number);
-EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
-
-
-/* forward declaration */
-static struct file_operations pfm_file_ops;
-
-/*
- * forward declarations
- */
-#ifndef CONFIG_SMP
-static void pfm_lazy_save_regs (struct task_struct *ta);
-#endif
-
-void dump_pmu_state(const char *);
-static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-
-#include "perfmon_itanium.h"
-#include "perfmon_mckinley.h"
-#include "perfmon_montecito.h"
-#include "perfmon_generic.h"
-
-static pmu_config_t *pmu_confs[]={
-	&pmu_conf_mont,
-	&pmu_conf_mck,
-	&pmu_conf_ita,
-	&pmu_conf_gen, /* must be last */
-	NULL
-};
-
-
-static int pfm_end_notify_user(pfm_context_t *ctx);
-
-static inline void
-pfm_clear_psr_pp(void)
-{
-	ia64_rsm(IA64_PSR_PP);
-	ia64_srlz_i();
-}
-
-static inline void
-pfm_set_psr_pp(void)
-{
-	ia64_ssm(IA64_PSR_PP);
-	ia64_srlz_i();
-}
-
-static inline void
-pfm_clear_psr_up(void)
-{
-	ia64_rsm(IA64_PSR_UP);
-	ia64_srlz_i();
-}
-
-static inline void
-pfm_set_psr_up(void)
-{
-	ia64_ssm(IA64_PSR_UP);
-	ia64_srlz_i();
-}
-
-static inline unsigned long
-pfm_get_psr(void)
-{
-	unsigned long tmp;
-	tmp = ia64_getreg(_IA64_REG_PSR);
-	ia64_srlz_i();
-	return tmp;
-}
-
-static inline void
-pfm_set_psr_l(unsigned long val)
-{
-	ia64_setreg(_IA64_REG_PSR_L, val);
-	ia64_srlz_i();
-}
-
-static inline void
-pfm_freeze_pmu(void)
-{
-	ia64_set_pmc(0,1UL);
-	ia64_srlz_d();
-}
-
-static inline void
-pfm_unfreeze_pmu(void)
-{
-	ia64_set_pmc(0,0UL);
-	ia64_srlz_d();
-}
-
-static inline void
-pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
-{
-	int i;
-
-	for (i=0; i < nibrs; i++) {
-		ia64_set_ibr(i, ibrs[i]);
-		ia64_dv_serialize_instruction();
-	}
-	ia64_srlz_i();
-}
-
-static inline void
-pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
-{
-	int i;
-
-	for (i=0; i < ndbrs; i++) {
-		ia64_set_dbr(i, dbrs[i]);
-		ia64_dv_serialize_data();
-	}
-	ia64_srlz_d();
-}
-
-/*
- * PMD[i] must be a counter. no check is made
- */
-static inline unsigned long
-pfm_read_soft_counter(pfm_context_t *ctx, int i)
-{
-	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
-}
-
-/*
- * PMD[i] must be a counter. no check is made
- */
-static inline void
-pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
-{
-	unsigned long ovfl_val = pmu_conf->ovfl_val;
-
-	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
-	/*
-	 * writing to unimplemented part is ignore, so we do not need to
-	 * mask off top part
-	 */
-	ia64_set_pmd(i, val & ovfl_val);
-}
-
-static pfm_msg_t *
-pfm_get_new_msg(pfm_context_t *ctx)
-{
-	int idx, next;
-
-	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
-
-	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
-	if (next == ctx->ctx_msgq_head) return NULL;
-
- 	idx = 	ctx->ctx_msgq_tail;
-	ctx->ctx_msgq_tail = next;
-
-	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
-
-	return ctx->ctx_msgq+idx;
-}
-
-static pfm_msg_t *
-pfm_get_next_msg(pfm_context_t *ctx)
-{
-	pfm_msg_t *msg;
-
-	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
-
-	if (PFM_CTXQ_EMPTY(ctx)) return NULL;
-
-	/*
-	 * get oldest message
-	 */
-	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
-
-	/*
-	 * and move forward
-	 */
-	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
-
-	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
-
-	return msg;
-}
-
-static void
-pfm_reset_msgq(pfm_context_t *ctx)
-{
-	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
-	DPRINT(("ctx=%p msgq reset\n", ctx));
-}
-
-static void *
-pfm_rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long addr;
-
-	size = PAGE_ALIGN(size);
-	mem  = vmalloc(size);
-	if (mem) {
-		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
-		memset(mem, 0, size);
-		addr = (unsigned long)mem;
-		while (size > 0) {
-			pfm_reserve_page(addr);
-			addr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-	}
-	return mem;
-}
-
-static void
-pfm_rvfree(void *mem, unsigned long size)
-{
-	unsigned long addr;
-
-	if (mem) {
-		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
-		addr = (unsigned long) mem;
-		while ((long) size > 0) {
-			pfm_unreserve_page(addr);
-			addr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-		vfree(mem);
-	}
-	return;
-}
-
-static pfm_context_t *
-pfm_context_alloc(void)
-{
-	pfm_context_t *ctx;
-
-	/* 
-	 * allocate context descriptor 
-	 * must be able to free with interrupts disabled
-	 */
-	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
-	if (ctx) {
-		memset(ctx, 0, sizeof(pfm_context_t));
-		DPRINT(("alloc ctx @%p\n", ctx));
-	}
-	return ctx;
-}
-
-static void
-pfm_context_free(pfm_context_t *ctx)
-{
-	if (ctx) {
-		DPRINT(("free ctx @%p\n", ctx));
-		kfree(ctx);
-	}
-}
-
-static void
-pfm_mask_monitoring(struct task_struct *task)
-{
-	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
-	unsigned long mask, val, ovfl_mask;
-	int i;
-
-	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
-
-	ovfl_mask = pmu_conf->ovfl_val;
-	/*
-	 * monitoring can only be masked as a result of a valid
-	 * counter overflow. In UP, it means that the PMU still
-	 * has an owner. Note that the owner can be different
-	 * from the current task. However the PMU state belongs
-	 * to the owner.
-	 * In SMP, a valid overflow only happens when task is
-	 * current. Therefore if we come here, we know that
-	 * the PMU state belongs to the current task, therefore
-	 * we can access the live registers.
-	 *
-	 * So in both cases, the live register contains the owner's
-	 * state. We can ONLY touch the PMU registers and NOT the PSR.
-	 *
-	 * As a consequence to this call, the thread->pmds[] array
-	 * contains stale information which must be ignored
-	 * when context is reloaded AND monitoring is active (see
-	 * pfm_restart).
-	 */
-	mask = ctx->ctx_used_pmds[0];
-	for (i = 0; mask; i++, mask>>=1) {
-		/* skip non used pmds */
-		if ((mask & 0x1) == 0) continue;
-		val = ia64_get_pmd(i);
-
-		if (PMD_IS_COUNTING(i)) {
-			/*
-		 	 * we rebuild the full 64 bit value of the counter
-		 	 */
-			ctx->ctx_pmds[i].val += (val & ovfl_mask);
-		} else {
-			ctx->ctx_pmds[i].val = val;
-		}
-		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
-			i,
-			ctx->ctx_pmds[i].val,
-			val & ovfl_mask));
-	}
-	/*
-	 * mask monitoring by setting the privilege level to 0
-	 * we cannot use psr.pp/psr.up for this, it is controlled by
-	 * the user
-	 *
-	 * if task is current, modify actual registers, otherwise modify
-	 * thread save state, i.e., what will be restored in pfm_load_regs()
-	 */
-	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
-	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
-		if ((mask & 0x1) == 0UL) continue;
-		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
-		th->pmcs[i] &= ~0xfUL;
-		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
-	}
-	/*
-	 * make all of this visible
-	 */
-	ia64_srlz_d();
-}
-
-/*
- * must always be done with task == current
- *
- * context must be in MASKED state when calling
- */
-static void
-pfm_restore_monitoring(struct task_struct *task)
-{
-	pfm_context_t *ctx = PFM_GET_CTX(task);
-	struct thread_struct *th = &task->thread;
-	unsigned long mask, ovfl_mask;
-	unsigned long psr, val;
-	int i, is_system;
-
-	is_system = ctx->ctx_fl_system;
-	ovfl_mask = pmu_conf->ovfl_val;
-
-	if (task != current) {
-		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
-		return;
-	}
-	if (ctx->ctx_state != PFM_CTX_MASKED) {
-		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
-			task->pid, current->pid, ctx->ctx_state);
-		return;
-	}
-	psr = pfm_get_psr();
-	/*
-	 * monitoring is masked via the PMC.
-	 * As we restore their value, we do not want each counter to
-	 * restart right away. We stop monitoring using the PSR,
-	 * restore the PMC (and PMD) and then re-establish the psr
-	 * as it was. Note that there can be no pending overflow at
-	 * this point, because monitoring was MASKED.
-	 *
-	 * system-wide session are pinned and self-monitoring
-	 */
-	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
-		/* disable dcr pp */
-		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
-		pfm_clear_psr_pp();
-	} else {
-		pfm_clear_psr_up();
-	}
-	/*
-	 * first, we restore the PMD
-	 */
-	mask = ctx->ctx_used_pmds[0];
-	for (i = 0; mask; i++, mask>>=1) {
-		/* skip non used pmds */
-		if ((mask & 0x1) == 0) continue;
-
-		if (PMD_IS_COUNTING(i)) {
-			/*
-			 * we split the 64bit value according to
-			 * counter width
-			 */
-			val = ctx->ctx_pmds[i].val & ovfl_mask;
-			ctx->ctx_pmds[i].val &= ~ovfl_mask;
-		} else {
-			val = ctx->ctx_pmds[i].val;
-		}
-		ia64_set_pmd(i, val);
-
-		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
-			i,
-			ctx->ctx_pmds[i].val,
-			val));
-	}
-	/*
-	 * restore the PMCs
-	 */
-	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
-	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
-		if ((mask & 0x1) == 0UL) continue;
-		th->pmcs[i] = ctx->ctx_pmcs[i];
-		ia64_set_pmc(i, th->pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
-	}
-	ia64_srlz_d();
-
-	/*
-	 * must restore DBR/IBR because could be modified while masked
-	 * XXX: need to optimize 
-	 */
-	if (ctx->ctx_fl_using_dbreg) {
-		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
-		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
-	}
-
-	/*
-	 * now restore PSR
-	 */
-	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
-		/* enable dcr pp */
-		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
-		ia64_srlz_i();
-	}
-	pfm_set_psr_l(psr);
-}
-
-static inline void
-pfm_save_pmds(unsigned long *pmds, unsigned long mask)
-{
-	int i;
-
-	ia64_srlz_d();
-
-	for (i=0; mask; i++, mask>>=1) {
-		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
-	}
-}
-
-/*
- * reload from thread state (used for ctxw only)
- */
-static inline void
-pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
-{
-	int i;
-	unsigned long val, ovfl_val = pmu_conf->ovfl_val;
-
-	for (i=0; mask; i++, mask>>=1) {
-		if ((mask & 0x1) == 0) continue;
-		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
-		ia64_set_pmd(i, val);
-	}
-	ia64_srlz_d();
-}
-
-/*
- * propagate PMD from context to thread-state
- */
-static inline void
-pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
-{
-	struct thread_struct *thread = &task->thread;
-	unsigned long ovfl_val = pmu_conf->ovfl_val;
-	unsigned long mask = ctx->ctx_all_pmds[0];
-	unsigned long val;
-	int i;
-
-	DPRINT(("mask=0x%lx\n", mask));
-
-	for (i=0; mask; i++, mask>>=1) {
-
-		val = ctx->ctx_pmds[i].val;
-
-		/*
-		 * We break up the 64 bit value into 2 pieces
-		 * the lower bits go to the machine state in the
-		 * thread (will be reloaded on ctxsw in).
-		 * The upper part stays in the soft-counter.
-		 */
-		if (PMD_IS_COUNTING(i)) {
-			ctx->ctx_pmds[i].val = val & ~ovfl_val;
-			 val &= ovfl_val;
-		}
-		thread->pmds[i] = val;
-
-		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
-			i,
-			thread->pmds[i],
-			ctx->ctx_pmds[i].val));
-	}
-}
-
-/*
- * propagate PMC from context to thread-state
- */
-static inline void
-pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
-{
-	struct thread_struct *thread = &task->thread;
-	unsigned long mask = ctx->ctx_all_pmcs[0];
-	int i;
-
-	DPRINT(("mask=0x%lx\n", mask));
-
-	for (i=0; mask; i++, mask>>=1) {
-		/* masking 0 with ovfl_val yields 0 */
-		thread->pmcs[i] = ctx->ctx_pmcs[i];
-		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
-	}
-}
-
-
-
-static inline void
-pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
-{
-	int i;
-
-	for (i=0; mask; i++, mask>>=1) {
-		if ((mask & 0x1) == 0) continue;
-		ia64_set_pmc(i, pmcs[i]);
-	}
-	ia64_srlz_d();
-}
-
-static inline int
-pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
-{
-	return memcmp(a, b, sizeof(pfm_uuid_t));
-}
-
-static inline int
-pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
-{
-	int ret = 0;
-	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
-	return ret;
-}
-
-static inline int
-pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
-{
-	int ret = 0;
-	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
-	return ret;
-}
-
-
-static inline int
-pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
-		     int cpu, void *arg)
-{
-	int ret = 0;
-	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
-	return ret;
-}
-
-static inline int
-pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
-		     int cpu, void *arg)
-{
-	int ret = 0;
-	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
-	return ret;
-}
-
-static inline int
-pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
-{
-	int ret = 0;
-	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
-	return ret;
-}
-
-static inline int
-pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
-{
-	int ret = 0;
-	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
-	return ret;
-}
-
-static pfm_buffer_fmt_t *
-__pfm_find_buffer_fmt(pfm_uuid_t uuid)
-{
-	struct list_head * pos;
-	pfm_buffer_fmt_t * entry;
-
-	list_for_each(pos, &pfm_buffer_fmt_list) {
-		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
-		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
-			return entry;
-	}
-	return NULL;
-}
- 
-/*
- * find a buffer format based on its uuid
- */
-static pfm_buffer_fmt_t *
-pfm_find_buffer_fmt(pfm_uuid_t uuid)
-{
-	pfm_buffer_fmt_t * fmt;
-	spin_lock(&pfm_buffer_fmt_lock);
-	fmt = __pfm_find_buffer_fmt(uuid);
-	spin_unlock(&pfm_buffer_fmt_lock);
-	return fmt;
-}
- 
-int
-pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
-{
-	int ret = 0;
-
-	/* some sanity checks */
-	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
-
-	/* we need at least a handler */
-	if (fmt->fmt_handler == NULL) return -EINVAL;
-
-	/*
-	 * XXX: need check validity of fmt_arg_size
-	 */
-
-	spin_lock(&pfm_buffer_fmt_lock);
-
-	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
-		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
-		ret = -EBUSY;
-		goto out;
-	} 
-	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
-	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
-
-out:
-	spin_unlock(&pfm_buffer_fmt_lock);
- 	return ret;
-}
-EXPORT_SYMBOL(pfm_register_buffer_fmt);
-
-int
-pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
-{
-	pfm_buffer_fmt_t *fmt;
-	int ret = 0;
-
-	spin_lock(&pfm_buffer_fmt_lock);
-
-	fmt = __pfm_find_buffer_fmt(uuid);
-	if (!fmt) {
-		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
-		ret = -EINVAL;
-		goto out;
-	}
-	list_del_init(&fmt->fmt_list);
-	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
-
-out:
-	spin_unlock(&pfm_buffer_fmt_lock);
-	return ret;
-
-}
-EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
-
-extern void update_pal_halt_status(int);
-
-static int
-pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
-{
-	unsigned long flags;
-	/*
-	 * validy checks on cpu_mask have been done upstream
-	 */
-	LOCK_PFS(flags);
-
-	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
-		pfm_sessions.pfs_sys_sessions,
-		pfm_sessions.pfs_task_sessions,
-		pfm_sessions.pfs_sys_use_dbregs,
-		is_syswide,
-		cpu));
-
-	if (is_syswide) {
-		/*
-		 * cannot mix system wide and per-task sessions
-		 */
-		if (pfm_sessions.pfs_task_sessions > 0UL) {
-			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
-			  	pfm_sessions.pfs_task_sessions));
-			goto abort;
-		}
-
-		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
-
-		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
-
-		pfm_sessions.pfs_sys_session[cpu] = task;
-
-		pfm_sessions.pfs_sys_sessions++ ;
-
-	} else {
-		if (pfm_sessions.pfs_sys_sessions) goto abort;
-		pfm_sessions.pfs_task_sessions++;
-	}
-
-	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
-		pfm_sessions.pfs_sys_sessions,
-		pfm_sessions.pfs_task_sessions,
-		pfm_sessions.pfs_sys_use_dbregs,
-		is_syswide,
-		cpu));
-
-	/*
-	 * disable default_idle() to go to PAL_HALT
-	 */
-	update_pal_halt_status(0);
-
-	UNLOCK_PFS(flags);
-
-	return 0;
-
-error_conflict:
-	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
-  		pfm_sessions.pfs_sys_session[cpu]->pid,
-		cpu));
-abort:
-	UNLOCK_PFS(flags);
-
-	return -EBUSY;
-
-}
-
-static int
-pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
-{
-	unsigned long flags;
-	/*
-	 * validy checks on cpu_mask have been done upstream
-	 */
-	LOCK_PFS(flags);
-
-	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
-		pfm_sessions.pfs_sys_sessions,
-		pfm_sessions.pfs_task_sessions,
-		pfm_sessions.pfs_sys_use_dbregs,
-		is_syswide,
-		cpu));
-
-
-	if (is_syswide) {
-		pfm_sessions.pfs_sys_session[cpu] = NULL;
-		/*
-		 * would not work with perfmon+more than one bit in cpu_mask
-		 */
-		if (ctx && ctx->ctx_fl_using_dbreg) {
-			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
-				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
-			} else {
-				pfm_sessions.pfs_sys_use_dbregs--;
-			}
-		}
-		pfm_sessions.pfs_sys_sessions--;
-	} else {
-		pfm_sessions.pfs_task_sessions--;
-	}
-	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
-		pfm_sessions.pfs_sys_sessions,
-		pfm_sessions.pfs_task_sessions,
-		pfm_sessions.pfs_sys_use_dbregs,
-		is_syswide,
-		cpu));
-
-	/*
-	 * if possible, enable default_idle() to go into PAL_HALT
-	 */
-	if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
-		update_pal_halt_status(1);
-
-	UNLOCK_PFS(flags);
-
-	return 0;
-}
-
-/*
- * removes virtual mapping of the sampling buffer.
- * IMPORTANT: cannot be called with interrupts disable, e.g. inside
- * a PROTECT_CTX() section.
- */
-static int
-pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
-{
-	int r;
-
-	/* sanity checks */
-	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
-		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
-		return -EINVAL;
-	}
-
-	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
-
-	/*
-	 * does the actual unmapping
-	 */
-	down_write(&task->mm->mmap_sem);
-
-	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
-
-	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
-
-	up_write(&task->mm->mmap_sem);
-	if (r !=0) {
-		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
-	}
-
-	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
-
-	return 0;
-}
-
-/*
- * free actual physical storage used by sampling buffer
- */
-#if 0
-static int
-pfm_free_smpl_buffer(pfm_context_t *ctx)
-{
-	pfm_buffer_fmt_t *fmt;
-
-	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
-
-	/*
-	 * we won't use the buffer format anymore
-	 */
-	fmt = ctx->ctx_buf_fmt;
-
-	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
-		ctx->ctx_smpl_hdr,
-		ctx->ctx_smpl_size,
-		ctx->ctx_smpl_vaddr));
-
-	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
-
-	/*
-	 * free the buffer
-	 */
-	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
-
-	ctx->ctx_smpl_hdr  = NULL;
-	ctx->ctx_smpl_size = 0UL;
-
-	return 0;
-
-invalid_free:
-	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
-	return -EINVAL;
-}
-#endif
-
-static inline void
-pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
-{
-	if (fmt == NULL) return;
-
-	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
-
-}
-
-/*
- * pfmfs should _never_ be mounted by userland - too much of security hassle,
- * no real gain from having the whole whorehouse mounted. So we don't need
- * any operations on the root directory. However, we need a non-trivial
- * d_name - pfm: will go nicely and kill the special-casing in procfs.
- */
-static struct vfsmount *pfmfs_mnt;
-
-static int __init
-init_pfm_fs(void)
-{
-	int err = register_filesystem(&pfm_fs_type);
-	if (!err) {
-		pfmfs_mnt = kern_mount(&pfm_fs_type);
-		err = PTR_ERR(pfmfs_mnt);
-		if (IS_ERR(pfmfs_mnt))
-			unregister_filesystem(&pfm_fs_type);
-		else
-			err = 0;
-	}
-	return err;
-}
-
-static void __exit
-exit_pfm_fs(void)
-{
-	unregister_filesystem(&pfm_fs_type);
-	mntput(pfmfs_mnt);
-}
-
-static ssize_t
-pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
-{
-	pfm_context_t *ctx;
-	pfm_msg_t *msg;
-	ssize_t ret;
-	unsigned long flags;
-  	DECLARE_WAITQUEUE(wait, current);
-	XEN_NOT_SUPPORTED_YET;
-	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
-		return -EINVAL;
-	}
-
-	ctx = (pfm_context_t *)filp->private_data;
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
-		return -EINVAL;
-	}
-
-	/*
-	 * check even when there is no message
-	 */
-	if (size < sizeof(pfm_msg_t)) {
-		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
-		return -EINVAL;
-	}
-
-	PROTECT_CTX(ctx, flags);
-
-  	/*
-	 * put ourselves on the wait queue
-	 */
-  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);
-
-
-  	for(;;) {
-		/*
-		 * check wait queue
-		 */
-
-  		set_current_state(TASK_INTERRUPTIBLE);
-
-		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
-
-		ret = 0;
-		if(PFM_CTXQ_EMPTY(ctx) == 0) break;
-
-		UNPROTECT_CTX(ctx, flags);
-
-		/*
-		 * check non-blocking read
-		 */
-      		ret = -EAGAIN;
-		if(filp->f_flags & O_NONBLOCK) break;
-
-		/*
-		 * check pending signals
-		 */
-		if(signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-      		/*
-		 * no message, so wait
-		 */
-      		schedule();
-
-		PROTECT_CTX(ctx, flags);
-	}
-	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
-  	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
-
-	if (ret < 0) goto abort;
-
-	ret = -EINVAL;
-	msg = pfm_get_next_msg(ctx);
-	if (msg == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
-		goto abort_locked;
-	}
-
-	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
-
-	ret = -EFAULT;
-  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
-
-abort_locked:
-	UNPROTECT_CTX(ctx, flags);
-abort:
-	return ret;
-}
-
-static ssize_t
-pfm_write(struct file *file, const char __user *ubuf,
-			  size_t size, loff_t *ppos)
-{
-	DPRINT(("pfm_write called\n"));
-	return -EINVAL;
-}
-
-static unsigned int
-pfm_poll(struct file *filp, poll_table * wait)
-{
-	pfm_context_t *ctx;
-	unsigned long flags;
-	unsigned int mask = 0;
-
-	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
-		return 0;
-	}
-
-	ctx = (pfm_context_t *)filp->private_data;
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
-		return 0;
-	}
-
-
-	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
-
-	poll_wait(filp, &ctx->ctx_msgq_wait, wait);
-
-	PROTECT_CTX(ctx, flags);
-
-	if (PFM_CTXQ_EMPTY(ctx) == 0)
-		mask =  POLLIN | POLLRDNORM;
-
-	UNPROTECT_CTX(ctx, flags);
-
-	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
-
-	return mask;
-}
-
-static int
-pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-{
-	DPRINT(("pfm_ioctl called\n"));
-	return -EINVAL;
-}
-
-/*
- * interrupt cannot be masked when coming here
- */
-static inline int
-pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
-{
-	int ret;
-
-	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
-
-	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-		current->pid,
-		fd,
-		on,
-		ctx->ctx_async_queue, ret));
-
-	return ret;
-}
-
-static int
-pfm_fasync(int fd, struct file *filp, int on)
-{
-	pfm_context_t *ctx;
-	int ret;
-
-	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
-		return -EBADF;
-	}
-
-	ctx = (pfm_context_t *)filp->private_data;
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
-		return -EBADF;
-	}
-	/*
-	 * we cannot mask interrupts during this call because this may
-	 * may go to sleep if memory is not readily avalaible.
-	 *
-	 * We are protected from the conetxt disappearing by the get_fd()/put_fd()
-	 * done in caller. Serialization of this function is ensured by caller.
-	 */
-	ret = pfm_do_fasync(fd, filp, ctx, on);
-
-
-	DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-		fd,
-		on,
-		ctx->ctx_async_queue, ret));
-
-	return ret;
-}
-
-#ifdef CONFIG_SMP
-/*
- * this function is exclusively called from pfm_close().
- * The context is not protected at that time, nor are interrupts
- * on the remote CPU. That's necessary to avoid deadlocks.
- */
-static void
-pfm_syswide_force_stop(void *info)
-{
-	pfm_context_t   *ctx = (pfm_context_t *)info;
-	struct pt_regs *regs = task_pt_regs(current);
-	struct task_struct *owner;
-	unsigned long flags;
-	int ret;
-
-	if (ctx->ctx_cpu != smp_processor_id()) {
-		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
-			ctx->ctx_cpu,
-			smp_processor_id());
-		return;
-	}
-	owner = GET_PMU_OWNER();
-	if (owner != ctx->ctx_task) {
-		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
-			smp_processor_id(),
-			owner->pid, ctx->ctx_task->pid);
-		return;
-	}
-	if (GET_PMU_CTX() != ctx) {
-		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
-			smp_processor_id(),
-			GET_PMU_CTX(), ctx);
-		return;
-	}
-
-	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
-	/*
-	 * the context is already protected in pfm_close(), we simply
-	 * need to mask interrupts to avoid a PMU interrupt race on
-	 * this CPU
-	 */
-	local_irq_save(flags);
-
-	ret = pfm_context_unload(ctx, NULL, 0, regs);
-	if (ret) {
-		DPRINT(("context_unload returned %d\n", ret));
-	}
-
-	/*
-	 * unmask interrupts, PMU interrupts are now spurious here
-	 */
-	local_irq_restore(flags);
-}
-
-static void
-pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
-{
-	int ret;
-
-	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
-	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
-	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
-}
-#endif /* CONFIG_SMP */
-
-/*
- * called for each close(). Partially free resources.
- * When caller is self-monitoring, the context is unloaded.
- */
-static int
-pfm_flush(struct file *filp, fl_owner_t id)
-{
-	pfm_context_t *ctx;
-	struct task_struct *task;
-	struct pt_regs *regs;
-	unsigned long flags;
-	unsigned long smpl_buf_size = 0UL;
-	void *smpl_buf_vaddr = NULL;
-	int state, is_system;
-
-	if (PFM_IS_FILE(filp) == 0) {
-		DPRINT(("bad magic for\n"));
-		return -EBADF;
-	}
-
-	ctx = (pfm_context_t *)filp->private_data;
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
-		return -EBADF;
-	}
-
-	/*
-	 * remove our file from the async queue, if we use this mode.
-	 * This can be done without the context being protected. We come
-	 * here when the context has become unreacheable by other tasks.
-	 *
-	 * We may still have active monitoring at this point and we may
-	 * end up in pfm_overflow_handler(). However, fasync_helper()
-	 * operates with interrupts disabled and it cleans up the
-	 * queue. If the PMU handler is called prior to entering
-	 * fasync_helper() then it will send a signal. If it is
-	 * invoked after, it will find an empty queue and no
-	 * signal will be sent. In both case, we are safe
-	 */
-	if (filp->f_flags & FASYNC) {
-		DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
-		pfm_do_fasync (-1, filp, ctx, 0);
-	}
-
-	PROTECT_CTX(ctx, flags);
-
-	state     = ctx->ctx_state;
-	is_system = ctx->ctx_fl_system;
-
-	task = PFM_CTX_TASK(ctx);
-	regs = task_pt_regs(task);
-
-	DPRINT(("ctx_state=%d is_current=%d\n",
-		state,
-		task == current ? 1 : 0));
-
-	/*
-	 * if state == UNLOADED, then task is NULL
-	 */
-
-	/*
-	 * we must stop and unload because we are losing access to the context.
-	 */
-	if (task == current) {
-#ifdef CONFIG_SMP
-		/*
-		 * the task IS the owner but it migrated to another CPU: that's bad
-		 * but we must handle this cleanly. Unfortunately, the kernel does
-		 * not provide a mechanism to block migration (while the context is loaded).
-		 *
-		 * We need to release the resource on the ORIGINAL cpu.
-		 */
-		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-
-			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-			/*
-			 * keep context protected but unmask interrupt for IPI
-			 */
-			local_irq_restore(flags);
-
-			pfm_syswide_cleanup_other_cpu(ctx);
-
-			/*
-			 * restore interrupt masking
-			 */
-			local_irq_save(flags);
-
-			/*
-			 * context is unloaded at this point
-			 */
-		} else
-#endif /* CONFIG_SMP */
-		{
-
-			DPRINT(("forcing unload\n"));
-			/*
-		 	* stop and unload, returning with state UNLOADED
-		 	* and session unreserved.
-		 	*/
-			pfm_context_unload(ctx, NULL, 0, regs);
-
-			DPRINT(("ctx_state=%d\n", ctx->ctx_state));
-		}
-	}
-
-	/*
-	 * remove virtual mapping, if any, for the calling task.
-	 * cannot reset ctx field until last user is calling close().
-	 *
-	 * ctx_smpl_vaddr must never be cleared because it is needed
-	 * by every task with access to the context
-	 *
-	 * When called from do_exit(), the mm context is gone already, therefore
-	 * mm is NULL, i.e., the VMA is already gone  and we do not have to
-	 * do anything here
-	 */
-	if (ctx->ctx_smpl_vaddr && current->mm) {
-		smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
-		smpl_buf_size  = ctx->ctx_smpl_size;
-	}
-
-	UNPROTECT_CTX(ctx, flags);
-
-	/*
-	 * if there was a mapping, then we systematically remove it
-	 * at this point. Cannot be done inside critical section
-	 * because some VM function reenables interrupts.
-	 *
-	 */
-	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
-
-	return 0;
-}
-/*
- * called either on explicit close() or from exit_files(). 
- * Only the LAST user of the file gets to this point, i.e., it is
- * called only ONCE.
- *
- * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 
- * (fput()),i.e, last task to access the file. Nobody else can access the 
- * file at this point.
- *
- * When called from exit_files(), the VMA has been freed because exit_mm()
- * is executed before exit_files().
- *
- * When called from exit_files(), the current task is not yet ZOMBIE but we
- * flush the PMU state to the context. 
- */
-static int
-pfm_close(struct inode *inode, struct file *filp)
-{
-	pfm_context_t *ctx;
-	struct task_struct *task;
-	struct pt_regs *regs;
-  	DECLARE_WAITQUEUE(wait, current);
-	unsigned long flags;
-	unsigned long smpl_buf_size = 0UL;
-	void *smpl_buf_addr = NULL;
-	int free_possible = 1;
-	int state, is_system;
-
-	DPRINT(("pfm_close called private=%p\n", filp->private_data));
-
-	if (PFM_IS_FILE(filp) == 0) {
-		DPRINT(("bad magic\n"));
-		return -EBADF;
-	}
-	
-	ctx = (pfm_context_t *)filp->private_data;
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
-		return -EBADF;
-	}
-
-	PROTECT_CTX(ctx, flags);
-
-	state     = ctx->ctx_state;
-	is_system = ctx->ctx_fl_system;
-
-	task = PFM_CTX_TASK(ctx);
-	regs = task_pt_regs(task);
-
-	DPRINT(("ctx_state=%d is_current=%d\n", 
-		state,
-		task == current ? 1 : 0));
-
-	/*
-	 * if task == current, then pfm_flush() unloaded the context
-	 */
-	if (state == PFM_CTX_UNLOADED) goto doit;
-
-	/*
-	 * context is loaded/masked and task != current, we need to
-	 * either force an unload or go zombie
-	 */
-
-	/*
-	 * The task is currently blocked or will block after an overflow.
-	 * we must force it to wakeup to get out of the
-	 * MASKED state and transition to the unloaded state by itself.
-	 *
-	 * This situation is only possible for per-task mode
-	 */
-	if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
-
-		/*
-		 * set a "partial" zombie state to be checked
-		 * upon return from down() in pfm_handle_work().
-		 *
-		 * We cannot use the ZOMBIE state, because it is checked
-		 * by pfm_load_regs() which is called upon wakeup from down().
-		 * In such case, it would free the context and then we would
-		 * return to pfm_handle_work() which would access the
-		 * stale context. Instead, we set a flag invisible to pfm_load_regs()
-		 * but visible to pfm_handle_work().
-		 *
-		 * For some window of time, we have a zombie context with
-		 * ctx_state = MASKED  and not ZOMBIE
-		 */
-		ctx->ctx_fl_going_zombie = 1;
-
-		/*
-		 * force task to wake up from MASKED state
-		 */
-		complete(&ctx->ctx_restart_done);
-
-		DPRINT(("waking up ctx_state=%d\n", state));
-
-		/*
-		 * put ourself to sleep waiting for the other
-		 * task to report completion
-		 *
-		 * the context is protected by mutex, therefore there
-		 * is no risk of being notified of completion before
-		 * begin actually on the waitq.
-		 */
-  		set_current_state(TASK_INTERRUPTIBLE);
-  		add_wait_queue(&ctx->ctx_zombieq, &wait);
-
-		UNPROTECT_CTX(ctx, flags);
-
-		/*
-		 * XXX: check for signals :
-		 * 	- ok for explicit close
-		 * 	- not ok when coming from exit_files()
-		 */
-      		schedule();
-
-
-		PROTECT_CTX(ctx, flags);
-
-
-		remove_wait_queue(&ctx->ctx_zombieq, &wait);
-  		set_current_state(TASK_RUNNING);
-
-		/*
-		 * context is unloaded at this point
-		 */
-		DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
-	}
-	else if (task != current) {
-#ifdef CONFIG_SMP
-		/*
-	 	 * switch context to zombie state
-	 	 */
-		ctx->ctx_state = PFM_CTX_ZOMBIE;
-
-		DPRINT(("zombie ctx for [%d]\n", task->pid));
-		/*
-		 * cannot free the context on the spot. deferred until
-		 * the task notices the ZOMBIE state
-		 */
-		free_possible = 0;
-#else
-		pfm_context_unload(ctx, NULL, 0, regs);
-#endif
-	}
-
-doit:
-	/* reload state, may have changed during  opening of critical section */
-	state = ctx->ctx_state;
-
-	/*
-	 * the context is still attached to a task (possibly current)
-	 * we cannot destroy it right now
-	 */
-
-	/*
-	 * we must free the sampling buffer right here because
-	 * we cannot rely on it being cleaned up later by the
-	 * monitored task. It is not possible to free vmalloc'ed
-	 * memory in pfm_load_regs(). Instead, we remove the buffer
-	 * now. should there be subsequent PMU overflow originally
-	 * meant for sampling, the will be converted to spurious
-	 * and that's fine because the monitoring tools is gone anyway.
-	 */
-	if (ctx->ctx_smpl_hdr) {
-		smpl_buf_addr = ctx->ctx_smpl_hdr;
-		smpl_buf_size = ctx->ctx_smpl_size;
-		/* no more sampling */
-		ctx->ctx_smpl_hdr = NULL;
-		ctx->ctx_fl_is_sampling = 0;
-	}
-
-	DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
-		state,
-		free_possible,
-		smpl_buf_addr,
-		smpl_buf_size));
-
-	if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
-
-	/*
-	 * UNLOADED that the session has already been unreserved.
-	 */
-	if (state == PFM_CTX_ZOMBIE) {
-		pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
-	}
-
-	/*
-	 * disconnect file descriptor from context must be done
-	 * before we unlock.
-	 */
-	filp->private_data = NULL;
-
-	/*
-	 * if we free on the spot, the context is now completely unreacheable
-	 * from the callers side. The monitored task side is also cut, so we
-	 * can freely cut.
-	 *
-	 * If we have a deferred free, only the caller side is disconnected.
-	 */
-	UNPROTECT_CTX(ctx, flags);
-
-	/*
-	 * All memory free operations (especially for vmalloc'ed memory)
-	 * MUST be done with interrupts ENABLED.
-	 */
-	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);
-
-	/*
-	 * return the memory used by the context
-	 */
-	if (free_possible) pfm_context_free(ctx);
-
-	if (is_running_on_xen()) {
-		if (is_xenoprof_primary()) {
-			int ret = HYPERVISOR_perfmon_op(PFM_DESTROY_CONTEXT,
-			                                NULL, 0);
-			if (ret)
-				printk("%s:%d PFM_DESTROY_CONTEXT hypercall "
-				       "failed\n", __func__, __LINE__);
-		}
-	}
-	return 0;
-}
-
-static int
-pfm_no_open(struct inode *irrelevant, struct file *dontcare)
-{
-	DPRINT(("pfm_no_open called\n"));
-	return -ENXIO;
-}
-
-
-
-static struct file_operations pfm_file_ops = {
-	.llseek   = no_llseek,
-	.read     = pfm_read,
-	.write    = pfm_write,
-	.poll     = pfm_poll,
-	.ioctl    = pfm_ioctl,
-	.open     = pfm_no_open,	/* special open code to disallow open via /proc */
-	.fasync   = pfm_fasync,
-	.release  = pfm_close,
-	.flush	  = pfm_flush
-};
-
-static int
-pfmfs_delete_dentry(struct dentry *dentry)
-{
-	return 1;
-}
-
-static struct dentry_operations pfmfs_dentry_operations = {
-	.d_delete = pfmfs_delete_dentry,
-};
-
-
-static int
-pfm_alloc_fd(struct file **cfile)
-{
-	int fd, ret = 0;
-	struct file *file = NULL;
-	struct inode * inode;
-	char name[32];
-	struct qstr this;
-
-	fd = get_unused_fd();
-	if (fd < 0) return -ENFILE;
-
-	ret = -ENFILE;
-
-	file = get_empty_filp();
-	if (!file) goto out;
-
-	/*
-	 * allocate a new inode
-	 */
-	inode = new_inode(pfmfs_mnt->mnt_sb);
-	if (!inode) goto out;
-
-	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
-
-	inode->i_mode = S_IFCHR|S_IRUGO;
-	inode->i_uid  = current->fsuid;
-	inode->i_gid  = current->fsgid;
-
-	sprintf(name, "[%lu]", inode->i_ino);
-	this.name = name;
-	this.len  = strlen(name);
-	this.hash = inode->i_ino;
-
-	ret = -ENOMEM;
-
-	/*
-	 * allocate a new dcache entry
-	 */
-	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!file->f_dentry) goto out;
-
-	file->f_dentry->d_op = &pfmfs_dentry_operations;
-
-	d_add(file->f_dentry, inode);
-	file->f_vfsmnt = mntget(pfmfs_mnt);
-	file->f_mapping = inode->i_mapping;
-
-	file->f_op    = &pfm_file_ops;
-	file->f_mode  = FMODE_READ;
-	file->f_flags = O_RDONLY;
-	file->f_pos   = 0;
-
-	/*
-	 * may have to delay until context is attached?
-	 */
-	fd_install(fd, file);
-
-	/*
-	 * the file structure we will use
-	 */
-	*cfile = file;
-
-	return fd;
-out:
-	if (file) put_filp(file);
-	put_unused_fd(fd);
-	return ret;
-}
-
-static void
-pfm_free_fd(int fd, struct file *file)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-
-	/* 
-	 * there ie no fd_uninstall(), so we do it here
-	 */
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	rcu_assign_pointer(fdt->fd[fd], NULL);
-	spin_unlock(&files->file_lock);
-
-	if (file)
-		put_filp(file);
-	put_unused_fd(fd);
-}
-
-static int
-pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
-{
-	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
-
-	while (size > 0) {
-		unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
-
-
-		if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
-			return -ENOMEM;
-
-		addr  += PAGE_SIZE;
-		buf   += PAGE_SIZE;
-		size  -= PAGE_SIZE;
-	}
-	return 0;
-}
-
-/*
- * allocate a sampling buffer and remaps it into the user address space of the task
- */
-static int
-pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
-{
-	struct mm_struct *mm = task->mm;
-	struct vm_area_struct *vma = NULL;
-	unsigned long size;
-	void *smpl_buf;
-
-
-	/*
-	 * the fixed header + requested size and align to page boundary
-	 */
-	size = PAGE_ALIGN(rsize);
-
-	DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
-
-	/*
-	 * check requested size to avoid Denial-of-service attacks
-	 * XXX: may have to refine this test
-	 * Check against address space limit.
-	 *
-	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
-	 * 	return -ENOMEM;
-	 */
-	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
-		return -ENOMEM;
-
-	/*
-	 * We do the easy to undo allocations first.
- 	 *
-	 * pfm_rvmalloc(), clears the buffer, so there is no leak
-	 */
-	smpl_buf = pfm_rvmalloc(size);
-	if (smpl_buf == NULL) {
-		DPRINT(("Can't allocate sampling buffer\n"));
-		return -ENOMEM;
-	}
-
-	DPRINT(("smpl_buf @%p\n", smpl_buf));
-
-	/* allocate vma */
-	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
-	if (!vma) {
-		DPRINT(("Cannot allocate vma\n"));
-		goto error_kmem;
-	}
-	memset(vma, 0, sizeof(*vma));
-
-	/*
-	 * partially initialize the vma for the sampling buffer
-	 */
-	vma->vm_mm	     = mm;
-	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
-	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
-
-	/*
-	 * Now we have everything we need and we can initialize
-	 * and connect all the data structures
-	 */
-
-	ctx->ctx_smpl_hdr   = smpl_buf;
-	ctx->ctx_smpl_size  = size; /* aligned size */
-
-	/*
-	 * Let's do the difficult operations next.
-	 *
-	 * now we atomically find some area in the address space and
-	 * remap the buffer in it.
-	 */
-	down_write(&task->mm->mmap_sem);
-
-	/* find some free area in address space, must have mmap sem held */
-	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
-	if (vma->vm_start == 0UL) {
-		DPRINT(("Cannot find unmapped area for size %ld\n", size));
-		up_write(&task->mm->mmap_sem);
-		goto error;
-	}
-	vma->vm_end = vma->vm_start + size;
-	vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
-
-	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
-
-	/* can only be applied to current task, need to have the mm semaphore held when called */
-	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
-		DPRINT(("Can't remap buffer\n"));
-		up_write(&task->mm->mmap_sem);
-		goto error;
-	}
-
-	/*
-	 * now insert the vma in the vm list for the process, must be
-	 * done with mmap lock held
-	 */
-	insert_vm_struct(mm, vma);
-
-	mm->total_vm  += size >> PAGE_SHIFT;
-	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
-							vma_pages(vma));
-	up_write(&task->mm->mmap_sem);
-
-	/*
-	 * keep track of user level virtual address
-	 */
-	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
-	*(unsigned long *)user_vaddr = vma->vm_start;
-
-	return 0;
-
-error:
-	kmem_cache_free(vm_area_cachep, vma);
-error_kmem:
-	pfm_rvfree(smpl_buf, size);
-
-	return -ENOMEM;
-}
-
-/*
- * XXX: do something better here
- */
-static int
-pfm_bad_permissions(struct task_struct *task)
-{
-	/* inspired by ptrace_attach() */
-	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
-		current->uid,
-		current->gid,
-		task->euid,
-		task->suid,
-		task->uid,
-		task->egid,
-		task->sgid));
-
-	return ((current->uid != task->euid)
-	    || (current->uid != task->suid)
-	    || (current->uid != task->uid)
-	    || (current->gid != task->egid)
-	    || (current->gid != task->sgid)
-	    || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
-}
-
-static int
-pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
-{
-	int ctx_flags;
-
-	/* valid signal */
-
-	ctx_flags = pfx->ctx_flags;
-
-	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-
-		/*
-		 * cannot block in this mode
-		 */
-		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
-			DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
-			return -EINVAL;
-		}
-	} else {
-	}
-	/* probably more to add here */
-
-	return 0;
-}
-
-static int
-pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
-		     unsigned int cpu, pfarg_context_t *arg)
-{
-	pfm_buffer_fmt_t *fmt = NULL;
-	unsigned long size = 0UL;
-	void *uaddr = NULL;
-	void *fmt_arg = NULL;
-	int ret = 0;
-#define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)
-
-	/* invoke and lock buffer format, if found */
-	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
-	if (fmt == NULL) {
-		DPRINT(("[%d] cannot find buffer format\n", task->pid));
-		return -EINVAL;
-	}
-
-	/*
-	 * buffer argument MUST be contiguous to pfarg_context_t
-	 */
-	if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
-
-	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
-
-	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
-
-	if (ret) goto error;
-
-	/* link buffer format and context */
-	ctx->ctx_buf_fmt = fmt;
-
-	/*
-	 * check if buffer format wants to use perfmon buffer allocation/mapping service
-	 */
-	ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
-	if (ret) goto error;
-
-	if (size) {
-		/*
-		 * buffer is always remapped into the caller's address space
-		 */
-		ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
-		if (ret) goto error;
-
-		/* keep track of user address of buffer */
-		arg->ctx_smpl_vaddr = uaddr;
-	}
-	ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
-
-error:
-	return ret;
-}
-
-static void
-pfm_reset_pmu_state(pfm_context_t *ctx)
-{
-	int i;
-
-	/*
-	 * install reset values for PMC.
-	 */
-	for (i=1; PMC_IS_LAST(i) == 0; i++) {
-		if (PMC_IS_IMPL(i) == 0) continue;
-		ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
-		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
-	}
-	/*
-	 * PMD registers are set to 0UL when the context in memset()
-	 */
-
-	/*
-	 * On context switched restore, we must restore ALL pmc and ALL pmd even
-	 * when they are not actively used by the task. In UP, the incoming process
-	 * may otherwise pick up left over PMC, PMD state from the previous process.
-	 * As opposed to PMD, stale PMC can cause harm to the incoming
-	 * process because they may change what is being measured.
-	 * Therefore, we must systematically reinstall the entire
-	 * PMC state. In SMP, the same thing is possible on the
-	 * same CPU but also on between 2 CPUs.
-	 *
-	 * The problem with PMD is information leaking especially
-	 * to user level when psr.sp=0
-	 *
-	 * There is unfortunately no easy way to avoid this problem
-	 * on either UP or SMP. This definitively slows down the
-	 * pfm_load_regs() function.
-	 */
-
-	 /*
-	  * bitmask of all PMCs accessible to this context
-	  *
-	  * PMC0 is treated differently.
-	  */
-	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
-
-	/*
-	 * bitmask of all PMDs that are accesible to this context
-	 */
-	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
-
-	DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
-
-	/*
-	 * useful in case of re-enable after disable
-	 */
-	ctx->ctx_used_ibrs[0] = 0UL;
-	ctx->ctx_used_dbrs[0] = 0UL;
-}
-
-static int
-pfm_ctx_getsize(void *arg, size_t *sz)
-{
-	pfarg_context_t *req = (pfarg_context_t *)arg;
-	pfm_buffer_fmt_t *fmt;
-
-	*sz = 0;
-
-	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
-
-	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
-	if (fmt == NULL) {
-		DPRINT(("cannot find buffer format\n"));
-		return -EINVAL;
-	}
-	/* get just enough to copy in user parameters */
-	*sz = fmt->fmt_arg_size;
-	DPRINT(("arg_size=%lu\n", *sz));
-
-	return 0;
-}
-
-
-
-/*
- * cannot attach if :
- * 	- kernel task
- * 	- task not owned by caller
- * 	- task incompatible with context mode
- */
-static int
-pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
-{
-	/*
-	 * no kernel task or task not owner by caller
-	 */
-	if (task->mm == NULL) {
-		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
-		return -EPERM;
-	}
-	if (pfm_bad_permissions(task)) {
-		DPRINT(("no permission to attach to  [%d]\n", task->pid));
-		return -EPERM;
-	}
-	/*
-	 * cannot block in self-monitoring mode
-	 */
-	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
-		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
-		return -EINVAL;
-	}
-
-	if (task->exit_state == EXIT_ZOMBIE) {
-		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
-		return -EBUSY;
-	}
-
-	/*
-	 * always ok for self
-	 */
-	if (task == current) return 0;
-
-	if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
-		return -EBUSY;
-	}
-	/*
-	 * make sure the task is off any CPU
-	 */
-	wait_task_inactive(task);
-
-	/* more to come... */
-
-	return 0;
-}
-
-static int
-pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
-{
-	struct task_struct *p = current;
-	int ret;
-
-	/* XXX: need to add more checks here */
-	if (pid < 2) return -EPERM;
-
-	if (pid != current->pid) {
-
-		read_lock(&tasklist_lock);
-
-		p = find_task_by_pid(pid);
-
-		/* make sure task cannot go away while we operate on it */
-		if (p) get_task_struct(p);
-
-		read_unlock(&tasklist_lock);
-
-		if (p == NULL) return -ESRCH;
-	}
-
-	ret = pfm_task_incompatible(ctx, p);
-	if (ret == 0) {
-		*task = p;
-	} else if (p != current) {
-		pfm_put_task(p);
-	}
-	return ret;
-}
-
-
-
-static int
-pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	pfarg_context_t *req = (pfarg_context_t *)arg;
-	struct file *filp;
-	int ctx_flags;
-	int ret;
-
-	/* let's check the arguments first */
-	ret = pfarg_is_sane(current, req);
-	if (ret < 0) return ret;
-
-	ctx_flags = req->ctx_flags;
-
-	ret = -ENOMEM;
-
-	ctx = pfm_context_alloc();
-	if (!ctx) goto error;
-
-	ret = pfm_alloc_fd(&filp);
-	if (ret < 0) goto error_file;
-
-	req->ctx_fd = ctx->ctx_fd = ret;
-
-	/*
-	 * attach context to file
-	 */
-	filp->private_data = ctx;
-
-	/*
-	 * does the user want to sample?
-	 */
-	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
-		ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
-		if (ret) goto buffer_error;
-	}
-
-	/*
-	 * init context protection lock
-	 */
-	spin_lock_init(&ctx->ctx_lock);
-
-	/*
-	 * context is unloaded
-	 */
-	ctx->ctx_state = PFM_CTX_UNLOADED;
-
-	/*
-	 * initialization of context's flags
-	 */
-	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
-	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
-	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
-	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
-	/*
-	 * will move to set properties
-	 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
-	 */
-
-	/*
-	 * init restart semaphore to locked
-	 */
-	init_completion(&ctx->ctx_restart_done);
-
-	/*
-	 * activation is used in SMP only
-	 */
-	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
-	SET_LAST_CPU(ctx, -1);
-
-	/*
-	 * initialize notification message queue
-	 */
-	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
-	init_waitqueue_head(&ctx->ctx_msgq_wait);
-	init_waitqueue_head(&ctx->ctx_zombieq);
-
-	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
-		ctx,
-		ctx_flags,
-		ctx->ctx_fl_system,
-		ctx->ctx_fl_block,
-		ctx->ctx_fl_excl_idle,
-		ctx->ctx_fl_no_msg,
-		ctx->ctx_fd));
-
-	/*
-	 * initialize soft PMU state
-	 */
-	pfm_reset_pmu_state(ctx);
-
-	if (is_running_on_xen()) {
-		/*
-		 * kludge to get xenoprof.is_primary.
-		 * XENOPROF_init/ia64 is nop. so it is safe to call it here.
-		 */
-		struct xenoprof_init init;
-		ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
-		if (ret)
-			goto buffer_error;
-		init_xenoprof_primary(init.is_primary);
-
-		if (is_xenoprof_primary()) {
-			ret = HYPERVISOR_perfmon_op(PFM_CREATE_CONTEXT, arg, 0);
-			if (ret)
-				goto buffer_error;
-		}
-	}
-	return 0;
-
-buffer_error:
-	pfm_free_fd(ctx->ctx_fd, filp);
-
-	if (ctx->ctx_buf_fmt) {
-		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
-	}
-error_file:
-	pfm_context_free(ctx);
-
-error:
-	return ret;
-}
-
-static inline unsigned long
-pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
-{
-	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
-	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
-	extern unsigned long carta_random32 (unsigned long seed);
-
-	if (reg->flags & PFM_REGFL_RANDOM) {
-		new_seed = carta_random32(old_seed);
-		val -= (old_seed & mask);	/* counter values are negative numbers! */
-		if ((mask >> 32) != 0)
-			/* construct a full 64-bit random value: */
-			new_seed |= carta_random32(old_seed >> 32) << 32;
-		reg->seed = new_seed;
-	}
-	reg->lval = val;
-	return val;
-}
-
-static void
-pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
-{
-	unsigned long mask = ovfl_regs[0];
-	unsigned long reset_others = 0UL;
-	unsigned long val;
-	int i;
-
-	/*
-	 * now restore reset value on sampling overflowed counters
-	 */
-	mask >>= PMU_FIRST_COUNTER;
-	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
-
-		if ((mask & 0x1UL) == 0UL) continue;
-
-		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
-		reset_others        |= ctx->ctx_pmds[i].reset_pmds[0];
-
-		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
-	}
-
-	/*
-	 * Now take care of resetting the other registers
-	 */
-	for(i = 0; reset_others; i++, reset_others >>= 1) {
-
-		if ((reset_others & 0x1) == 0) continue;
-
-		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
-
-		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
-			  is_long_reset ? "long" : "short", i, val));
-	}
-}
-
-static void
-pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
-{
-	unsigned long mask = ovfl_regs[0];
-	unsigned long reset_others = 0UL;
-	unsigned long val;
-	int i;
-
-	DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
-
-	if (ctx->ctx_state == PFM_CTX_MASKED) {
-		pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
-		return;
-	}
-
-	/*
-	 * now restore reset value on sampling overflowed counters
-	 */
-	mask >>= PMU_FIRST_COUNTER;
-	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
-
-		if ((mask & 0x1UL) == 0UL) continue;
-
-		val           = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
-		reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
-
-		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
-
-		pfm_write_soft_counter(ctx, i, val);
-	}
-
-	/*
-	 * Now take care of resetting the other registers
-	 */
-	for(i = 0; reset_others; i++, reset_others >>= 1) {
-
-		if ((reset_others & 0x1) == 0) continue;
-
-		val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
-
-		if (PMD_IS_COUNTING(i)) {
-			pfm_write_soft_counter(ctx, i, val);
-		} else {
-			ia64_set_pmd(i, val);
-		}
-		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
-			  is_long_reset ? "long" : "short", i, val));
-	}
-	ia64_srlz_d();
-}
-
-static int
-pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct thread_struct *thread = NULL;
-	struct task_struct *task;
-	pfarg_reg_t *req = (pfarg_reg_t *)arg;
-	unsigned long value, pmc_pm;
-	unsigned long smpl_pmds, reset_pmds, impl_pmds;
-	unsigned int cnum, reg_flags, flags, pmc_type;
-	int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
-	int is_monitor, is_counting, state;
-	int ret = -EINVAL;
-	pfm_reg_check_t	wr_func;
-#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_WRITE_PMCS,
-			                             arg, count);
-		return 0;
-  	}
-	state     = ctx->ctx_state;
-	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
-	is_system = ctx->ctx_fl_system;
-	task      = ctx->ctx_task;
-	impl_pmds = pmu_conf->impl_pmds[0];
-
-	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
-	if (is_loaded) {
-		thread = &task->thread;
-		/*
-		 * In system wide and when the context is loaded, access can only happen
-		 * when the caller is running on the CPU being monitored by the session.
-		 * It does not have to be the owner (ctx_task) of the context per se.
-		 */
-		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-			return -EBUSY;
-		}
-		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
-	}
-	expert_mode = pfm_sysctl.expert_mode; 
-
-	for (i = 0; i < count; i++, req++) {
-
-		cnum       = req->reg_num;
-		reg_flags  = req->reg_flags;
-		value      = req->reg_value;
-		smpl_pmds  = req->reg_smpl_pmds[0];
-		reset_pmds = req->reg_reset_pmds[0];
-		flags      = 0;
-
-
-		if (cnum >= PMU_MAX_PMCS) {
-			DPRINT(("pmc%u is invalid\n", cnum));
-			goto error;
-		}
-
-		pmc_type   = pmu_conf->pmc_desc[cnum].type;
-		pmc_pm     = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
-		is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
-		is_monitor  = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
-
-		/*
-		 * we reject all non implemented PMC as well
-		 * as attempts to modify PMC[0-3] which are used
-		 * as status registers by the PMU
-		 */
-		if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
-			DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
-			goto error;
-		}
-		wr_func = pmu_conf->pmc_desc[cnum].write_check;
-		/*
-		 * If the PMC is a monitor, then if the value is not the default:
-		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
-		 * 	- per-task           : PMCx.pm=0 (user monitor)
-		 */
-		if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
-			DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
-				cnum,
-				pmc_pm,
-				is_system));
-			goto error;
-		}
-
-		if (is_counting) {
-			/*
-		 	 * enforce generation of overflow interrupt. Necessary on all
-		 	 * CPUs.
-		 	 */
-			value |= 1 << PMU_PMC_OI;
-
-			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
-				flags |= PFM_REGFL_OVFL_NOTIFY;
-			}
-
-			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
-
-			/* verify validity of smpl_pmds */
-			if ((smpl_pmds & impl_pmds) != smpl_pmds) {
-				DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
-				goto error;
-			}
-
-			/* verify validity of reset_pmds */
-			if ((reset_pmds & impl_pmds) != reset_pmds) {
-				DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
-				goto error;
-			}
-		} else {
-			if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
-				DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
-				goto error;
-			}
-			/* eventid on non-counting monitors are ignored */
-		}
-
-		/*
-		 * execute write checker, if any
-		 */
-		if (likely(expert_mode == 0 && wr_func)) {
-			ret = (*wr_func)(task, ctx, cnum, &value, regs);
-			if (ret) goto error;
-			ret = -EINVAL;
-		}
-
-		/*
-		 * no error on this register
-		 */
-		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
-		/*
-		 * Now we commit the changes to the software state
-		 */
-
-		/*
-		 * update overflow information
-		 */
-		if (is_counting) {
-			/*
-		 	 * full flag update each time a register is programmed
-		 	 */
-			ctx->ctx_pmds[cnum].flags = flags;
-
-			ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
-			ctx->ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
-			ctx->ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
-
-			/*
-			 * Mark all PMDS to be accessed as used.
-			 *
-			 * We do not keep track of PMC because we have to
-			 * systematically restore ALL of them.
-			 *
-			 * We do not update the used_monitors mask, because
-			 * if we have not programmed them, then will be in
-			 * a quiescent state, therefore we will not need to
-			 * mask/restore then when context is MASKED.
-			 */
-			CTX_USED_PMD(ctx, reset_pmds);
-			CTX_USED_PMD(ctx, smpl_pmds);
-			/*
-		 	 * make sure we do not try to reset on
-		 	 * restart because we have established new values
-		 	 */
-			if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
-		}
-		/*
-		 * Needed in case the user does not initialize the equivalent
-		 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
-		 * possible leak here.
-		 */
-		CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
-
-		/*
-		 * keep track of the monitor PMC that we are using.
-		 * we save the value of the pmc in ctx_pmcs[] and if
-		 * the monitoring is not stopped for the context we also
-		 * place it in the saved state area so that it will be
-		 * picked up later by the context switch code.
-		 *
-		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
-		 *
-		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
-		 * monitoring needs to be stopped.
-		 */
-		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
-
-		/*
-		 * update context state
-		 */
-		ctx->ctx_pmcs[cnum] = value;
-
-		if (is_loaded) {
-			/*
-			 * write thread state
-			 */
-			if (is_system == 0) thread->pmcs[cnum] = value;
-
-			/*
-			 * write hardware register if we can
-			 */
-			if (can_access_pmu) {
-				ia64_set_pmc(cnum, value);
-			}
-#ifdef CONFIG_SMP
-			else {
-				/*
-				 * per-task SMP only here
-				 *
-			 	 * we are guaranteed that the task is not running on the other CPU,
-			 	 * we indicate that this PMD will need to be reloaded if the task
-			 	 * is rescheduled on the CPU it ran last on.
-			 	 */
-				ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
-			}
-#endif
-		}
-
-		DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
-			  cnum,
-			  value,
-			  is_loaded,
-			  can_access_pmu,
-			  flags,
-			  ctx->ctx_all_pmcs[0],
-			  ctx->ctx_used_pmds[0],
-			  ctx->ctx_pmds[cnum].eventid,
-			  smpl_pmds,
-			  reset_pmds,
-			  ctx->ctx_reload_pmcs[0],
-			  ctx->ctx_used_monitors[0],
-			  ctx->ctx_ovfl_regs[0]));
-	}
-
-	/*
-	 * make sure the changes are visible
-	 */
-	if (can_access_pmu) ia64_srlz_d();
-
-	return 0;
-error:
-	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
-	return ret;
-}
-
-static int
-pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct thread_struct *thread = NULL;
-	struct task_struct *task;
-	pfarg_reg_t *req = (pfarg_reg_t *)arg;
-	unsigned long value, hw_value, ovfl_mask;
-	unsigned int cnum;
-	int i, can_access_pmu = 0, state;
-	int is_counting, is_loaded, is_system, expert_mode;
-	int ret = -EINVAL;
-	pfm_reg_check_t wr_func;
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_WRITE_PMDS,
-			                             arg, count);
-		return 0;
-  	}
-
-	state     = ctx->ctx_state;
-	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
-	is_system = ctx->ctx_fl_system;
-	ovfl_mask = pmu_conf->ovfl_val;
-	task      = ctx->ctx_task;
-
-	if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
-
-	/*
-	 * on both UP and SMP, we can only write to the PMC when the task is
-	 * the owner of the local PMU.
-	 */
-	if (likely(is_loaded)) {
-		thread = &task->thread;
-		/*
-		 * In system wide and when the context is loaded, access can only happen
-		 * when the caller is running on the CPU being monitored by the session.
-		 * It does not have to be the owner (ctx_task) of the context per se.
-		 */
-		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
-			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-			return -EBUSY;
-		}
-		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
-	}
-	expert_mode = pfm_sysctl.expert_mode; 
-
-	for (i = 0; i < count; i++, req++) {
-
-		cnum  = req->reg_num;
-		value = req->reg_value;
-
-		if (!PMD_IS_IMPL(cnum)) {
-			DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
-			goto abort_mission;
-		}
-		is_counting = PMD_IS_COUNTING(cnum);
-		wr_func     = pmu_conf->pmd_desc[cnum].write_check;
-
-		/*
-		 * execute write checker, if any
-		 */
-		if (unlikely(expert_mode == 0 && wr_func)) {
-			unsigned long v = value;
-
-			ret = (*wr_func)(task, ctx, cnum, &v, regs);
-			if (ret) goto abort_mission;
-
-			value = v;
-			ret   = -EINVAL;
-		}
-
-		/*
-		 * no error on this register
-		 */
-		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
-		/*
-		 * now commit changes to software state
-		 */
-		hw_value = value;
-
-		/*
-		 * update virtualized (64bits) counter
-		 */
-		if (is_counting) {
-			/*
-			 * write context state
-			 */
-			ctx->ctx_pmds[cnum].lval = value;
-
-			/*
-			 * when context is load we use the split value
-			 */
-			if (is_loaded) {
-				hw_value = value &  ovfl_mask;
-				value    = value & ~ovfl_mask;
-			}
-		}
-		/*
-		 * update reset values (not just for counters)
-		 */
-		ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
-		ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
-
-		/*
-		 * update randomization parameters (not just for counters)
-		 */
-		ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
-		ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
-
-		/*
-		 * update context value
-		 */
-		ctx->ctx_pmds[cnum].val  = value;
-
-		/*
-		 * Keep track of what we use
-		 *
-		 * We do not keep track of PMC because we have to
-		 * systematically restore ALL of them.
-		 */
-		CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
-
-		/*
-		 * mark this PMD register used as well
-		 */
-		CTX_USED_PMD(ctx, RDEP(cnum));
-
-		/*
-		 * make sure we do not try to reset on
-		 * restart because we have established new values
-		 */
-		if (is_counting && state == PFM_CTX_MASKED) {
-			ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
-		}
-
-		if (is_loaded) {
-			/*
-		 	 * write thread state
-		 	 */
-			if (is_system == 0) thread->pmds[cnum] = hw_value;
-
-			/*
-			 * write hardware register if we can
-			 */
-			if (can_access_pmu) {
-				ia64_set_pmd(cnum, hw_value);
-			} else {
-#ifdef CONFIG_SMP
-				/*
-			 	 * we are guaranteed that the task is not running on the other CPU,
-			 	 * we indicate that this PMD will need to be reloaded if the task
-			 	 * is rescheduled on the CPU it ran last on.
-			 	 */
-				ctx->ctx_reload_pmds[0] |= 1UL << cnum;
-#endif
-			}
-		}
-
-		DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx  short_reset=0x%lx "
-			  "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
-			cnum,
-			value,
-			is_loaded,
-			can_access_pmu,
-			hw_value,
-			ctx->ctx_pmds[cnum].val,
-			ctx->ctx_pmds[cnum].short_reset,
-			ctx->ctx_pmds[cnum].long_reset,
-			PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
-			ctx->ctx_pmds[cnum].seed,
-			ctx->ctx_pmds[cnum].mask,
-			ctx->ctx_used_pmds[0],
-			ctx->ctx_pmds[cnum].reset_pmds[0],
-			ctx->ctx_reload_pmds[0],
-			ctx->ctx_all_pmds[0],
-			ctx->ctx_ovfl_regs[0]));
-	}
-
-	/*
-	 * make changes visible
-	 */
-	if (can_access_pmu) ia64_srlz_d();
-
-	return 0;
-
-abort_mission:
-	/*
-	 * for now, we have only one possibility for error
-	 */
-	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
-	return ret;
-}
-
-/*
- * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
- * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
- * interrupt is delivered during the call, it will be kept pending until we leave, making
- * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
- * guaranteed to return consistent data to the user, it may simply be old. It is not
- * trivial to treat the overflow while inside the call because you may end up in
- * some module sampling buffer code causing deadlocks.
- */
-static int
-pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct thread_struct *thread = NULL;
-	struct task_struct *task;
-	unsigned long val = 0UL, lval, ovfl_mask, sval;
-	pfarg_reg_t *req = (pfarg_reg_t *)arg;
-	unsigned int cnum, reg_flags = 0;
-	int i, can_access_pmu = 0, state;
-	int is_loaded, is_system, is_counting, expert_mode;
-	int ret = -EINVAL;
-	pfm_reg_check_t rd_func;
-	XEN_NOT_SUPPORTED_YET;
-
-	/*
-	 * access is possible when loaded only for
-	 * self-monitoring tasks or in UP mode
-	 */
-
-	state     = ctx->ctx_state;
-	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
-	is_system = ctx->ctx_fl_system;
-	ovfl_mask = pmu_conf->ovfl_val;
-	task      = ctx->ctx_task;
-
-	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
-	if (likely(is_loaded)) {
-		thread = &task->thread;
-		/*
-		 * In system wide and when the context is loaded, access can only happen
-		 * when the caller is running on the CPU being monitored by the session.
-		 * It does not have to be the owner (ctx_task) of the context per se.
-		 */
-		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
-			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-			return -EBUSY;
-		}
-		/*
-		 * this can be true when not self-monitoring only in UP
-		 */
-		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
-
-		if (can_access_pmu) ia64_srlz_d();
-	}
-	expert_mode = pfm_sysctl.expert_mode; 
-
-	DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
-		is_loaded,
-		can_access_pmu,
-		state));
-
-	/*
-	 * on both UP and SMP, we can only read the PMD from the hardware register when
-	 * the task is the owner of the local PMU.
-	 */
-
-	for (i = 0; i < count; i++, req++) {
-
-		cnum        = req->reg_num;
-		reg_flags   = req->reg_flags;
-
-		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
-		/*
-		 * we can only read the register that we use. That includes
-		 * the one we explicitely initialize AND the one we want included
-		 * in the sampling buffer (smpl_regs).
-		 *
-		 * Having this restriction allows optimization in the ctxsw routine
-		 * without compromising security (leaks)
-		 */
-		if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
-
-		sval        = ctx->ctx_pmds[cnum].val;
-		lval        = ctx->ctx_pmds[cnum].lval;
-		is_counting = PMD_IS_COUNTING(cnum);
-
-		/*
-		 * If the task is not the current one, then we check if the
-		 * PMU state is still in the local live register due to lazy ctxsw.
-		 * If true, then we read directly from the registers.
-		 */
-		if (can_access_pmu){
-			val = ia64_get_pmd(cnum);
-		} else {
-			/*
-			 * context has been saved
-			 * if context is zombie, then task does not exist anymore.
-			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
-			 */
-			val = is_loaded ? thread->pmds[cnum] : 0UL;
-		}
-		rd_func = pmu_conf->pmd_desc[cnum].read_check;
-
-		if (is_counting) {
-			/*
-			 * XXX: need to check for overflow when loaded
-			 */
-			val &= ovfl_mask;
-			val += sval;
-		}
-
-		/*
-		 * execute read checker, if any
-		 */
-		if (unlikely(expert_mode == 0 && rd_func)) {
-			unsigned long v = val;
-			ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
-			if (ret) goto error;
-			val = v;
-			ret = -EINVAL;
-		}
-
-		PFM_REG_RETFLAG_SET(reg_flags, 0);
-
-		DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
-
-		/*
-		 * update register return value, abort all if problem during copy.
-		 * we only modify the reg_flags field. no check mode is fine because
-		 * access has been verified upfront in sys_perfmonctl().
-		 */
-		req->reg_value            = val;
-		req->reg_flags            = reg_flags;
-		req->reg_last_reset_val   = lval;
-	}
-
-	return 0;
-
-error:
-	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
-	return ret;
-}
-
-int
-pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
-	pfm_context_t *ctx;
-
-	if (req == NULL) return -EINVAL;
-
- 	ctx = GET_PMU_CTX();
-
-	if (ctx == NULL) return -EINVAL;
-
-	/*
-	 * for now limit to current task, which is enough when calling
-	 * from overflow handler
-	 */
-	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
-	return pfm_write_pmcs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_pmcs);
-
-int
-pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
-	pfm_context_t *ctx;
-
-	if (req == NULL) return -EINVAL;
-
- 	ctx = GET_PMU_CTX();
-
-	if (ctx == NULL) return -EINVAL;
-
-	/*
-	 * for now limit to current task, which is enough when calling
-	 * from overflow handler
-	 */
-	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
-	return pfm_read_pmds(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_read_pmds);
-
-/*
- * Only call this function when a process it trying to
- * write the debug registers (reading is always allowed)
- */
-int
-pfm_use_debug_registers(struct task_struct *task)
-{
-	pfm_context_t *ctx = task->thread.pfm_context;
-	unsigned long flags;
-	int ret = 0;
-
-	if (pmu_conf->use_rr_dbregs == 0) return 0;
-
-	DPRINT(("called for [%d]\n", task->pid));
-
-	/*
-	 * do it only once
-	 */
-	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
-
-	/*
-	 * Even on SMP, we do not need to use an atomic here because
-	 * the only way in is via ptrace() and this is possible only when the
-	 * process is stopped. Even in the case where the ctxsw out is not totally
-	 * completed by the time we come here, there is no way the 'stopped' process
-	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
-	 * So this is always safe.
-	 */
-	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
-
-	LOCK_PFS(flags);
-
-	/*
-	 * We cannot allow setting breakpoints when system wide monitoring
-	 * sessions are using the debug registers.
-	 */
-	if (pfm_sessions.pfs_sys_use_dbregs> 0)
-		ret = -1;
-	else
-		pfm_sessions.pfs_ptrace_use_dbregs++;
-
-	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
-		  pfm_sessions.pfs_ptrace_use_dbregs,
-		  pfm_sessions.pfs_sys_use_dbregs,
-		  task->pid, ret));
-
-	UNLOCK_PFS(flags);
-
-	return ret;
-}
-
-/*
- * This function is called for every task that exits with the
- * IA64_THREAD_DBG_VALID set. This indicates a task which was
- * able to use the debug registers for debugging purposes via
- * ptrace(). Therefore we know it was not using them for
- * perfmormance monitoring, so we only decrement the number
- * of "ptraced" debug register users to keep the count up to date
- */
-int
-pfm_release_debug_registers(struct task_struct *task)
-{
-	unsigned long flags;
-	int ret;
-
-	if (pmu_conf->use_rr_dbregs == 0) return 0;
-
-	LOCK_PFS(flags);
-	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
-		ret = -1;
-	}  else {
-		pfm_sessions.pfs_ptrace_use_dbregs--;
-		ret = 0;
-	}
-	UNLOCK_PFS(flags);
-
-	return ret;
-}
-
-static int
-pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct task_struct *task;
-	pfm_buffer_fmt_t *fmt;
-	pfm_ovfl_ctrl_t rst_ctrl;
-	int state, is_system;
-	int ret = 0;
-	XEN_NOT_SUPPORTED_YET;
-
-	state     = ctx->ctx_state;
-	fmt       = ctx->ctx_buf_fmt;
-	is_system = ctx->ctx_fl_system;
-	task      = PFM_CTX_TASK(ctx);
-
-	switch(state) {
-		case PFM_CTX_MASKED:
-			break;
-		case PFM_CTX_LOADED: 
-			if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
-			/* fall through */
-		case PFM_CTX_UNLOADED:
-		case PFM_CTX_ZOMBIE:
-			DPRINT(("invalid state=%d\n", state));
-			return -EBUSY;
-		default:
-			DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
-			return -EINVAL;
-	}
-
-	/*
- 	 * In system wide and when the context is loaded, access can only happen
- 	 * when the caller is running on the CPU being monitored by the session.
- 	 * It does not have to be the owner (ctx_task) of the context per se.
- 	 */
-	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-		return -EBUSY;
-	}
-
-	/* sanity check */
-	if (unlikely(task == NULL)) {
-		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
-		return -EINVAL;
-	}
-
-	if (task == current || is_system) {
-
-		fmt = ctx->ctx_buf_fmt;
-
-		DPRINT(("restarting self %d ovfl=0x%lx\n",
-			task->pid,
-			ctx->ctx_ovfl_regs[0]));
-
-		if (CTX_HAS_SMPL(ctx)) {
-
-			prefetch(ctx->ctx_smpl_hdr);
-
-			rst_ctrl.bits.mask_monitoring = 0;
-			rst_ctrl.bits.reset_ovfl_pmds = 0;
-
-			if (state == PFM_CTX_LOADED)
-				ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
-			else
-				ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
-		} else {
-			rst_ctrl.bits.mask_monitoring = 0;
-			rst_ctrl.bits.reset_ovfl_pmds = 1;
-		}
-
-		if (ret == 0) {
-			if (rst_ctrl.bits.reset_ovfl_pmds)
-				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
-
-			if (rst_ctrl.bits.mask_monitoring == 0) {
-				DPRINT(("resuming monitoring for [%d]\n", task->pid));
-
-				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
-			} else {
-				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
-
-				// cannot use pfm_stop_monitoring(task, regs);
-			}
-		}
-		/*
-		 * clear overflowed PMD mask to remove any stale information
-		 */
-		ctx->ctx_ovfl_regs[0] = 0UL;
-
-		/*
-		 * back to LOADED state
-		 */
-		ctx->ctx_state = PFM_CTX_LOADED;
-
-		/*
-		 * XXX: not really useful for self monitoring
-		 */
-		ctx->ctx_fl_can_restart = 0;
-
-		return 0;
-	}
-
-	/* 
-	 * restart another task
-	 */
-
-	/*
-	 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 
-	 * one is seen by the task.
-	 */
-	if (state == PFM_CTX_MASKED) {
-		if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
-		/*
-		 * will prevent subsequent restart before this one is
-		 * seen by other task
-		 */
-		ctx->ctx_fl_can_restart = 0;
-	}
-
-	/*
-	 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
-	 * the task is blocked or on its way to block. That's the normal
-	 * restart path. If the monitoring is not masked, then the task
-	 * can be actively monitoring and we cannot directly intervene.
-	 * Therefore we use the trap mechanism to catch the task and
-	 * force it to reset the buffer/reset PMDs.
-	 *
-	 * if non-blocking, then we ensure that the task will go into
-	 * pfm_handle_work() before returning to user mode.
-	 *
-	 * We cannot explicitely reset another task, it MUST always
-	 * be done by the task itself. This works for system wide because
-	 * the tool that is controlling the session is logically doing 
-	 * "self-monitoring".
-	 */
-	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
-		DPRINT(("unblocking [%d] \n", task->pid));
-		complete(&ctx->ctx_restart_done);
-	} else {
-		DPRINT(("[%d] armed exit trap\n", task->pid));
-
-		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
-
-		PFM_SET_WORK_PENDING(task, 1);
-
-		pfm_set_task_notify(task);
-
-		/*
-		 * XXX: send reschedule if task runs on another CPU
-		 */
-	}
-	return 0;
-}
-
-static int
-pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	unsigned int m = *(unsigned int *)arg;
-	XEN_NOT_SUPPORTED_YET;
-
-	pfm_sysctl.debug = m == 0 ? 0 : 1;
-
-	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
-
-	if (m == 0) {
-		memset(pfm_stats, 0, sizeof(pfm_stats));
-		for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
-	}
-	return 0;
-}
-
-/*
- * arg can be NULL and count can be zero for this function
- */
-static int
-pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct thread_struct *thread = NULL;
-	struct task_struct *task;
-	pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
-	unsigned long flags;
-	dbreg_t dbreg;
-	unsigned int rnum;
-	int first_time;
-	int ret = 0, state;
-	int i, can_access_pmu = 0;
-	int is_system, is_loaded;
-
-	if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
-
-	state     = ctx->ctx_state;
-	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
-	is_system = ctx->ctx_fl_system;
-	task      = ctx->ctx_task;
-
-	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
-
-	/*
-	 * on both UP and SMP, we can only write to the PMC when the task is
-	 * the owner of the local PMU.
-	 */
-	if (is_loaded) {
-		thread = &task->thread;
-		/*
-		 * In system wide and when the context is loaded, access can only happen
-		 * when the caller is running on the CPU being monitored by the session.
-		 * It does not have to be the owner (ctx_task) of the context per se.
-		 */
-		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
-			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-			return -EBUSY;
-		}
-		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
-	}
-
-	/*
-	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
-	 * ensuring that no real breakpoint can be installed via this call.
-	 *
-	 * IMPORTANT: regs can be NULL in this function
-	 */
-
-	first_time = ctx->ctx_fl_using_dbreg == 0;
-
-	/*
-	 * don't bother if we are loaded and task is being debugged
-	 */
-	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
-		DPRINT(("debug registers already in use for [%d]\n", task->pid));
-		return -EBUSY;
-	}
-
-	/*
-	 * check for debug registers in system wide mode
-	 *
-	 * If though a check is done in pfm_context_load(),
-	 * we must repeat it here, in case the registers are
-	 * written after the context is loaded
-	 */
-	if (is_loaded) {
-		LOCK_PFS(flags);
-
-		if (first_time && is_system) {
-			if (pfm_sessions.pfs_ptrace_use_dbregs)
-				ret = -EBUSY;
-			else
-				pfm_sessions.pfs_sys_use_dbregs++;
-		}
-		UNLOCK_PFS(flags);
-	}
-
-	if (ret != 0) return ret;
-
-	/*
-	 * mark ourself as user of the debug registers for
-	 * perfmon purposes.
-	 */
-	ctx->ctx_fl_using_dbreg = 1;
-
-	/*
- 	 * clear hardware registers to make sure we don't
- 	 * pick up stale state.
-	 *
-	 * for a system wide session, we do not use
-	 * thread.dbr, thread.ibr because this process
-	 * never leaves the current CPU and the state
-	 * is shared by all processes running on it
- 	 */
-	if (first_time && can_access_pmu) {
-		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
-		for (i=0; i < pmu_conf->num_ibrs; i++) {
-			ia64_set_ibr(i, 0UL);
-			ia64_dv_serialize_instruction();
-		}
-		ia64_srlz_i();
-		for (i=0; i < pmu_conf->num_dbrs; i++) {
-			ia64_set_dbr(i, 0UL);
-			ia64_dv_serialize_data();
-		}
-		ia64_srlz_d();
-	}
-
-	/*
-	 * Now install the values into the registers
-	 */
-	for (i = 0; i < count; i++, req++) {
-
-		rnum      = req->dbreg_num;
-		dbreg.val = req->dbreg_value;
-
-		ret = -EINVAL;
-
-		if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
-			DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
-				  rnum, dbreg.val, mode, i, count));
-
-			goto abort_mission;
-		}
-
-		/*
-		 * make sure we do not install enabled breakpoint
-		 */
-		if (rnum & 0x1) {
-			if (mode == PFM_CODE_RR)
-				dbreg.ibr.ibr_x = 0;
-			else
-				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
-		}
-
-		PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
-
-		/*
-		 * Debug registers, just like PMC, can only be modified
-		 * by a kernel call. Moreover, perfmon() access to those
-		 * registers are centralized in this routine. The hardware
-		 * does not modify the value of these registers, therefore,
-		 * if we save them as they are written, we can avoid having
-		 * to save them on context switch out. This is made possible
-		 * by the fact that when perfmon uses debug registers, ptrace()
-		 * won't be able to modify them concurrently.
-		 */
-		if (mode == PFM_CODE_RR) {
-			CTX_USED_IBR(ctx, rnum);
-
-			if (can_access_pmu) {
-				ia64_set_ibr(rnum, dbreg.val);
-				ia64_dv_serialize_instruction();
-			}
-
-			ctx->ctx_ibrs[rnum] = dbreg.val;
-
-			DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
-				rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
-		} else {
-			CTX_USED_DBR(ctx, rnum);
-
-			if (can_access_pmu) {
-				ia64_set_dbr(rnum, dbreg.val);
-				ia64_dv_serialize_data();
-			}
-			ctx->ctx_dbrs[rnum] = dbreg.val;
-
-			DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
-				rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
-		}
-	}
-
-	return 0;
-
-abort_mission:
-	/*
-	 * in case it was our first attempt, we undo the global modifications
-	 */
-	if (first_time) {
-		LOCK_PFS(flags);
-		if (ctx->ctx_fl_system) {
-			pfm_sessions.pfs_sys_use_dbregs--;
-		}
-		UNLOCK_PFS(flags);
-		ctx->ctx_fl_using_dbreg = 0;
-	}
-	/*
-	 * install error return flag
-	 */
-	PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
-
-	return ret;
-}
-
-static int
-pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
-}
-
-static int
-pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
-}
-
-int
-pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
-	pfm_context_t *ctx;
-
-	if (req == NULL) return -EINVAL;
-
- 	ctx = GET_PMU_CTX();
-
-	if (ctx == NULL) return -EINVAL;
-
-	/*
-	 * for now limit to current task, which is enough when calling
-	 * from overflow handler
-	 */
-	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
-	return pfm_write_ibrs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_ibrs);
-
-int
-pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
-{
-	pfm_context_t *ctx;
-
-	if (req == NULL) return -EINVAL;
-
- 	ctx = GET_PMU_CTX();
-
-	if (ctx == NULL) return -EINVAL;
-
-	/*
-	 * for now limit to current task, which is enough when calling
-	 * from overflow handler
-	 */
-	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
-
-	return pfm_write_dbrs(ctx, req, nreq, regs);
-}
-EXPORT_SYMBOL(pfm_mod_write_dbrs);
-
-
-static int
-pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	pfarg_features_t *req = (pfarg_features_t *)arg;
-
-	if (is_running_on_xen())
-		return HYPERVISOR_perfmon_op(PFM_GET_FEATURES, &arg, 0);
-	req->ft_version = PFM_VERSION;
-	return 0;
-}
-
-static int
-pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct pt_regs *tregs;
-	struct task_struct *task = PFM_CTX_TASK(ctx);
-	int state, is_system;
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_STOP, NULL, 0);
-		return 0;
-  	}
-
-	state     = ctx->ctx_state;
-	is_system = ctx->ctx_fl_system;
-
-	/*
-	 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
-	 */
-	if (state == PFM_CTX_UNLOADED) return -EINVAL;
-
-	/*
- 	 * In system wide and when the context is loaded, access can only happen
- 	 * when the caller is running on the CPU being monitored by the session.
- 	 * It does not have to be the owner (ctx_task) of the context per se.
- 	 */
-	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-		return -EBUSY;
-	}
-	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
-		PFM_CTX_TASK(ctx)->pid,
-		state,
-		is_system));
-	/*
-	 * in system mode, we need to update the PMU directly
-	 * and the user level state of the caller, which may not
-	 * necessarily be the creator of the context.
-	 */
-	if (is_system) {
-		/*
-		 * Update local PMU first
-		 *
-		 * disable dcr pp
-		 */
-		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
-		ia64_srlz_i();
-
-		/*
-		 * update local cpuinfo
-		 */
-		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
-
-		/*
-		 * stop monitoring, does srlz.i
-		 */
-		pfm_clear_psr_pp();
-
-		/*
-		 * stop monitoring in the caller
-		 */
-		ia64_psr(regs)->pp = 0;
-
-		return 0;
-	}
-	/*
-	 * per-task mode
-	 */
-
-	if (task == current) {
-		/* stop monitoring  at kernel level */
-		pfm_clear_psr_up();
-
-		/*
-	 	 * stop monitoring at the user level
-	 	 */
-		ia64_psr(regs)->up = 0;
-	} else {
-		tregs = task_pt_regs(task);
-
-		/*
-	 	 * stop monitoring at the user level
-	 	 */
-		ia64_psr(tregs)->up = 0;
-
-		/*
-		 * monitoring disabled in kernel at next reschedule
-		 */
-		ctx->ctx_saved_psr_up = 0;
-		DPRINT(("task=[%d]\n", task->pid));
-	}
-	return 0;
-}
-
-
-static int
-pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct pt_regs *tregs;
-	int state, is_system;
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_START, NULL, 0);
-		return 0;
-  	}
-	state     = ctx->ctx_state;
-	is_system = ctx->ctx_fl_system;
-
-	if (state != PFM_CTX_LOADED) return -EINVAL;
-
-	/*
- 	 * In system wide and when the context is loaded, access can only happen
- 	 * when the caller is running on the CPU being monitored by the session.
- 	 * It does not have to be the owner (ctx_task) of the context per se.
- 	 */
-	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
-		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
-		return -EBUSY;
-	}
-
-	/*
-	 * in system mode, we need to update the PMU directly
-	 * and the user level state of the caller, which may not
-	 * necessarily be the creator of the context.
-	 */
-	if (is_system) {
-
-		/*
-		 * set user level psr.pp for the caller
-		 */
-		ia64_psr(regs)->pp = 1;
-
-		/*
-		 * now update the local PMU and cpuinfo
-		 */
-		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
-
-		/*
-		 * start monitoring at kernel level
-		 */
-		pfm_set_psr_pp();
-
-		/* enable dcr pp */
-		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
-		ia64_srlz_i();
-
-		return 0;
-	}
-
-	/*
-	 * per-process mode
-	 */
-
-	if (ctx->ctx_task == current) {
-
-		/* start monitoring at kernel level */
-		pfm_set_psr_up();
-
-		/*
-		 * activate monitoring at user level
-		 */
-		ia64_psr(regs)->up = 1;
-
-	} else {
-		tregs = task_pt_regs(ctx->ctx_task);
-
-		/*
-		 * start monitoring at the kernel level the next
-		 * time the task is scheduled
-		 */
-		ctx->ctx_saved_psr_up = IA64_PSR_UP;
-
-		/*
-		 * activate monitoring at user level
-		 */
-		ia64_psr(tregs)->up = 1;
-	}
-	return 0;
-}
-
-static int
-pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	pfarg_reg_t *req = (pfarg_reg_t *)arg;
-	unsigned int cnum;
-	int i;
-	int ret = -EINVAL;
-	XEN_NOT_SUPPORTED_YET;
-
-	for (i = 0; i < count; i++, req++) {
-
-		cnum = req->reg_num;
-
-		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
-
-		req->reg_value = PMC_DFL_VAL(cnum);
-
-		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
-
-		DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
-	}
-	return 0;
-
-abort_mission:
-	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
-	return ret;
-}
-
-static int
-pfm_check_task_exist(pfm_context_t *ctx)
-{
-	struct task_struct *g, *t;
-	int ret = -ESRCH;
-
-	read_lock(&tasklist_lock);
-
-	do_each_thread (g, t) {
-		if (t->thread.pfm_context == ctx) {
-			ret = 0;
-			break;
-		}
-	} while_each_thread (g, t);
-
-	read_unlock(&tasklist_lock);
-
-	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
-
-	return ret;
-}
-
-static int
-pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct task_struct *task;
-	struct thread_struct *thread;
-	struct pfm_context_t *old;
-	unsigned long flags;
-#ifndef CONFIG_SMP
-	struct task_struct *owner_task = NULL;
-#endif
-	pfarg_load_t *req = (pfarg_load_t *)arg;
-	unsigned long *pmcs_source, *pmds_source;
-	int the_cpu;
-	int ret = 0;
-	int state, is_system, set_dbregs = 0;
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_LOAD_CONTEXT, arg, 0);
-		return 0;
-  	}
-	state     = ctx->ctx_state;
-	is_system = ctx->ctx_fl_system;
-	/*
-	 * can only load from unloaded or terminated state
-	 */
-	if (state != PFM_CTX_UNLOADED) {
-		DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
-			req->load_pid,
-			ctx->ctx_state));
-		return -EBUSY;
-	}
-
-	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
-
-	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
-		DPRINT(("cannot use blocking mode on self\n"));
-		return -EINVAL;
-	}
-
-	ret = pfm_get_task(ctx, req->load_pid, &task);
-	if (ret) {
-		DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
-		return ret;
-	}
-
-	ret = -EINVAL;
-
-	/*
-	 * system wide is self monitoring only
-	 */
-	if (is_system && task != current) {
-		DPRINT(("system wide is self monitoring only load_pid=%d\n",
-			req->load_pid));
-		goto error;
-	}
-
-	thread = &task->thread;
-
-	ret = 0;
-	/*
-	 * cannot load a context which is using range restrictions,
-	 * into a task that is being debugged.
-	 */
-	if (ctx->ctx_fl_using_dbreg) {
-		if (thread->flags & IA64_THREAD_DBG_VALID) {
-			ret = -EBUSY;
-			DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
-			goto error;
-		}
-		LOCK_PFS(flags);
-
-		if (is_system) {
-			if (pfm_sessions.pfs_ptrace_use_dbregs) {
-				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
-				ret = -EBUSY;
-			} else {
-				pfm_sessions.pfs_sys_use_dbregs++;
-				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
-				set_dbregs = 1;
-			}
-		}
-
-		UNLOCK_PFS(flags);
-
-		if (ret) goto error;
-	}
-
-	/*
-	 * SMP system-wide monitoring implies self-monitoring.
-	 *
-	 * The programming model expects the task to
-	 * be pinned on a CPU throughout the session.
-	 * Here we take note of the current CPU at the
-	 * time the context is loaded. No call from
-	 * another CPU will be allowed.
-	 *
-	 * The pinning via shed_setaffinity()
-	 * must be done by the calling task prior
-	 * to this call.
-	 *
-	 * systemwide: keep track of CPU this session is supposed to run on
-	 */
-	the_cpu = ctx->ctx_cpu = smp_processor_id();
-
-	ret = -EBUSY;
-	/*
-	 * now reserve the session
-	 */
-	ret = pfm_reserve_session(current, is_system, the_cpu);
-	if (ret) goto error;
-
-	/*
-	 * task is necessarily stopped at this point.
-	 *
-	 * If the previous context was zombie, then it got removed in
-	 * pfm_save_regs(). Therefore we should not see it here.
-	 * If we see a context, then this is an active context
-	 *
-	 * XXX: needs to be atomic
-	 */
-	DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
-		thread->pfm_context, ctx));
-
-	ret = -EBUSY;
-	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
-	if (old != NULL) {
-		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
-		goto error_unres;
-	}
-
-	pfm_reset_msgq(ctx);
-
-	ctx->ctx_state = PFM_CTX_LOADED;
-
-	/*
-	 * link context to task
-	 */
-	ctx->ctx_task = task;
-
-	if (is_system) {
-		/*
-		 * we load as stopped
-		 */
-		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
-		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
-
-		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
-	} else {
-		thread->flags |= IA64_THREAD_PM_VALID;
-	}
-
-	/*
-	 * propagate into thread-state
-	 */
-	pfm_copy_pmds(task, ctx);
-	pfm_copy_pmcs(task, ctx);
-
-	pmcs_source = thread->pmcs;
-	pmds_source = thread->pmds;
-
-	/*
-	 * always the case for system-wide
-	 */
-	if (task == current) {
-
-		if (is_system == 0) {
-
-			/* allow user level control */
-			ia64_psr(regs)->sp = 0;
-			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
-
-			SET_LAST_CPU(ctx, smp_processor_id());
-			INC_ACTIVATION();
-			SET_ACTIVATION(ctx);
-#ifndef CONFIG_SMP
-			/*
-			 * push the other task out, if any
-			 */
-			owner_task = GET_PMU_OWNER();
-			if (owner_task) pfm_lazy_save_regs(owner_task);
-#endif
-		}
-		/*
-		 * load all PMD from ctx to PMU (as opposed to thread state)
-		 * restore all PMC from ctx to PMU
-		 */
-		pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
-		pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
-
-		ctx->ctx_reload_pmcs[0] = 0UL;
-		ctx->ctx_reload_pmds[0] = 0UL;
-
-		/*
-		 * guaranteed safe by earlier check against DBG_VALID
-		 */
-		if (ctx->ctx_fl_using_dbreg) {
-			pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
-			pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
-		}
-		/*
-		 * set new ownership
-		 */
-		SET_PMU_OWNER(task, ctx);
-
-		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
-	} else {
-		/*
-		 * when not current, task MUST be stopped, so this is safe
-		 */
-		regs = task_pt_regs(task);
-
-		/* force a full reload */
-		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
-		SET_LAST_CPU(ctx, -1);
-
-		/* initial saved psr (stopped) */
-		ctx->ctx_saved_psr_up = 0UL;
-		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
-	}
-
-	ret = 0;
-
-error_unres:
-	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
-error:
-	/*
-	 * we must undo the dbregs setting (for system-wide)
-	 */
-	if (ret && set_dbregs) {
-		LOCK_PFS(flags);
-		pfm_sessions.pfs_sys_use_dbregs--;
-		UNLOCK_PFS(flags);
-	}
-	/*
-	 * release task, there is now a link with the context
-	 */
-	if (is_system == 0 && task != current) {
-		pfm_put_task(task);
-
-		if (ret == 0) {
-			ret = pfm_check_task_exist(ctx);
-			if (ret) {
-				ctx->ctx_state = PFM_CTX_UNLOADED;
-				ctx->ctx_task  = NULL;
-			}
-		}
-	}
-	return ret;
-}
-
-/*
- * in this function, we do not need to increase the use count
- * for the task via get_task_struct(), because we hold the
- * context lock. If the task were to disappear while having
- * a context attached, it would go through pfm_exit_thread()
- * which also grabs the context lock  and would therefore be blocked
- * until we are here.
- */
-static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
-
-static int
-pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
-{
-	struct task_struct *task = PFM_CTX_TASK(ctx);
-	struct pt_regs *tregs;
-	int prev_state, is_system;
-	int ret;
-
-  	if (is_running_on_xen()) {
-		if (is_xenoprof_primary())
-			return HYPERVISOR_perfmon_op(PFM_UNLOAD_CONTEXT,
-			                             NULL, 0);
-		return 0;
-  	}
-	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
-
-	prev_state = ctx->ctx_state;
-	is_system  = ctx->ctx_fl_system;
-
-	/*
-	 * unload only when necessary
-	 */
-	if (prev_state == PFM_CTX_UNLOADED) {
-		DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
-		return 0;
-	}
-
-	/*
-	 * clear psr and dcr bits
-	 */
-	ret = pfm_stop(ctx, NULL, 0, regs);
-	if (ret) return ret;
-
-	ctx->ctx_state = PFM_CTX_UNLOADED;
-
-	/*
-	 * in system mode, we need to update the PMU directly
-	 * and the user level state of the caller, which may not
-	 * necessarily be the creator of the context.
-	 */
-	if (is_system) {
-
-		/*
-		 * Update cpuinfo
-		 *
-		 * local PMU is taken care of in pfm_stop()
-		 */
-		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
-		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
-
-		/*
-		 * save PMDs in context
-		 * release ownership
-		 */
-		pfm_flush_pmds(current, ctx);
-
-		/*
-		 * at this point we are done with the PMU
-		 * so we can unreserve the resource.
-		 */
-		if (prev_state != PFM_CTX_ZOMBIE) 
-			pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
-
-		/*
-		 * disconnect context from task
-		 */
-		task->thread.pfm_context = NULL;
-		/*
-		 * disconnect task from context
-		 */
-		ctx->ctx_task = NULL;
-
-		/*
-		 * There is nothing more to cleanup here.
-		 */
-		return 0;
-	}
-
-	/*
-	 * per-task mode
-	 */
-	tregs = task == current ? regs : task_pt_regs(task);
-
-	if (task == current) {
-		/*
-		 * cancel user level control
-		 */
-		ia64_psr(regs)->sp = 1;
-
-		DPRINT(("setting psr.sp for [%d]\n", task->pid));
-	}
-	/*
-	 * save PMDs to context
-	 * release ownership
-	 */
-	pfm_flush_pmds(task, ctx);
-
-	/*
-	 * at this point we are done with the PMU
-	 * so we can unreserve the resource.
-	 *
-	 * when state was ZOMBIE, we have already unreserved.
-	 */
-	if (prev_state != PFM_CTX_ZOMBIE) 
-		pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
-
-	/*
-	 * reset activation counter and psr
-	 */
-	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
-	SET_LAST_CPU(ctx, -1);
-
-	/*
-	 * PMU state will not be restored
-	 */
-	task->thread.flags &= ~IA64_THREAD_PM_VALID;
-
-	/*
-	 * break links between context and task
-	 */
-	task->thread.pfm_context  = NULL;
-	ctx->ctx_task             = NULL;
-
-	PFM_SET_WORK_PENDING(task, 0);
-
-	ctx->ctx_fl_trap_reason  = PFM_TRAP_REASON_NONE;
-	ctx->ctx_fl_can_restart  = 0;
-	ctx->ctx_fl_going_zombie = 0;
-
-	DPRINT(("disconnected [%d] from context\n", task->pid));
-
-	return 0;
-}
-
-
-/*
- * called only from exit_thread(): task == current
- * we come here only if current has a context attached (loaded or masked)
- */
-void
-pfm_exit_thread(struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	unsigned long flags;
-	struct pt_regs *regs = task_pt_regs(task);
-	int ret, state;
-	int free_ok = 0;
-
-	ctx = PFM_GET_CTX(task);
-
-	PROTECT_CTX(ctx, flags);
-
-	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
-
-	state = ctx->ctx_state;
-	switch(state) {
-		case PFM_CTX_UNLOADED:
-			/*
-	 		 * only comes to thios function if pfm_context is not NULL, i.e., cannot
-			 * be in unloaded state
-	 		 */
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
-			break;
-		case PFM_CTX_LOADED:
-		case PFM_CTX_MASKED:
-			ret = pfm_context_unload(ctx, NULL, 0, regs);
-			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
-			}
-			DPRINT(("ctx unloaded for current state was %d\n", state));
-
-			pfm_end_notify_user(ctx);
-			break;
-		case PFM_CTX_ZOMBIE:
-			ret = pfm_context_unload(ctx, NULL, 0, regs);
-			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
-			}
-			free_ok = 1;
-			break;
-		default:
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
-			break;
-	}
-	UNPROTECT_CTX(ctx, flags);
-
-	{ u64 psr = pfm_get_psr();
-	  BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
-	  BUG_ON(GET_PMU_OWNER());
-	  BUG_ON(ia64_psr(regs)->up);
-	  BUG_ON(ia64_psr(regs)->pp);
-	}
-
-	/*
-	 * All memory free operations (especially for vmalloc'ed memory)
-	 * MUST be done with interrupts ENABLED.
-	 */
-	if (free_ok) pfm_context_free(ctx);
-}
-
-/*
- * functions MUST be listed in the increasing order of their index (see permfon.h)
- */
-#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
-#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
-#define PFM_CMD_PCLRWS	(PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
-#define PFM_CMD_PCLRW	(PFM_CMD_FD|PFM_CMD_ARG_RW)
-#define PFM_CMD_NONE	{ NULL, "no-cmd", 0, 0, 0, NULL}
-
-static pfm_cmd_desc_t pfm_cmd_tab[]={
-/* 0  */PFM_CMD_NONE,
-/* 1  */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 2  */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 3  */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 4  */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
-/* 5  */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
-/* 6  */PFM_CMD_NONE,
-/* 7  */PFM_CMD_NONE,
-/* 8  */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
-/* 9  */PFM_CMD_NONE,
-/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
-/* 11 */PFM_CMD_NONE,
-/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
-/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
-/* 14 */PFM_CMD_NONE,
-/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
-/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
-/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
-/* 18 */PFM_CMD_NONE,
-/* 19 */PFM_CMD_NONE,
-/* 20 */PFM_CMD_NONE,
-/* 21 */PFM_CMD_NONE,
-/* 22 */PFM_CMD_NONE,
-/* 23 */PFM_CMD_NONE,
-/* 24 */PFM_CMD_NONE,
-/* 25 */PFM_CMD_NONE,
-/* 26 */PFM_CMD_NONE,
-/* 27 */PFM_CMD_NONE,
-/* 28 */PFM_CMD_NONE,
-/* 29 */PFM_CMD_NONE,
-/* 30 */PFM_CMD_NONE,
-/* 31 */PFM_CMD_NONE,
-/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
-/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
-};
-#define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
-
-static int
-pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
-{
-	struct task_struct *task;
-	int state, old_state;
-
-recheck:
-	state = ctx->ctx_state;
-	task  = ctx->ctx_task;
-
-	if (task == NULL) {
-		DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
-		return 0;
-	}
-
-	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
-		ctx->ctx_fd,
-		state,
-		task->pid,
-		task->state, PFM_CMD_STOPPED(cmd)));
-
-	/*
-	 * self-monitoring always ok.
-	 *
-	 * for system-wide the caller can either be the creator of the
-	 * context (to one to which the context is attached to) OR
-	 * a task running on the same CPU as the session.
-	 */
-	if (task == current || ctx->ctx_fl_system) return 0;
-
-	/*
-	 * we are monitoring another thread
-	 */
-	switch(state) {
-		case PFM_CTX_UNLOADED:
-			/*
-			 * if context is UNLOADED we are safe to go
-			 */
-			return 0;
-		case PFM_CTX_ZOMBIE:
-			/*
-			 * no command can operate on a zombie context
-			 */
-			DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
-			return -EINVAL;
-		case PFM_CTX_MASKED:
-			/*
-			 * PMU state has been saved to software even though
-			 * the thread may still be running.
-			 */
-			if (cmd != PFM_UNLOAD_CONTEXT) return 0;
-	}
-
-	/*
-	 * context is LOADED or MASKED. Some commands may need to have 
-	 * the task stopped.
-	 *
-	 * We could lift this restriction for UP but it would mean that
-	 * the user has no guarantee the task would not run between
-	 * two successive calls to perfmonctl(). That's probably OK.
-	 * If this user wants to ensure the task does not run, then
-	 * the task must be stopped.
-	 */
-	if (PFM_CMD_STOPPED(cmd)) {
-		if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-			DPRINT(("[%d] task not in stopped state\n", task->pid));
-			return -EBUSY;
-		}
-		/*
-		 * task is now stopped, wait for ctxsw out
-		 *
-		 * This is an interesting point in the code.
-		 * We need to unprotect the context because
-		 * the pfm_save_regs() routines needs to grab
-		 * the same lock. There are danger in doing
-		 * this because it leaves a window open for
-		 * another task to get access to the context
-		 * and possibly change its state. The one thing
-		 * that is not possible is for the context to disappear
-		 * because we are protected by the VFS layer, i.e.,
-		 * get_fd()/put_fd().
-		 */
-		old_state = state;
-
-		UNPROTECT_CTX(ctx, flags);
-
-		wait_task_inactive(task);
-
-		PROTECT_CTX(ctx, flags);
-
-		/*
-		 * we must recheck to verify if state has changed
-		 */
-		if (ctx->ctx_state != old_state) {
-			DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
-			goto recheck;
-		}
-	}
-	return 0;
-}
-
-/*
- * system-call entry point (must return long)
- */
-asmlinkage long
-sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
-{
-	struct file *file = NULL;
-	pfm_context_t *ctx = NULL;
-	unsigned long flags = 0UL;
-	void *args_k = NULL;
-	long ret; /* will expand int return types */
-	size_t base_sz, sz, xtra_sz = 0;
-	int narg, completed_args = 0, call_made = 0, cmd_flags;
-	int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
-	int (*getsize)(void *arg, size_t *sz);
-#define PFM_MAX_ARGSIZE	4096
-
-	/*
-	 * reject any call if perfmon was disabled at initialization
-	 */
-	if (unlikely(pmu_conf == NULL)) return -ENOSYS;
-
-	if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
-		DPRINT(("invalid cmd=%d\n", cmd));
-		return -EINVAL;
-	}
-
-	func      = pfm_cmd_tab[cmd].cmd_func;
-	narg      = pfm_cmd_tab[cmd].cmd_narg;
-	base_sz   = pfm_cmd_tab[cmd].cmd_argsize;
-	getsize   = pfm_cmd_tab[cmd].cmd_getsize;
-	cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
-
-	if (unlikely(func == NULL)) {
-		DPRINT(("invalid cmd=%d\n", cmd));
-		return -EINVAL;
-	}
-
-	DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
-		PFM_CMD_NAME(cmd),
-		cmd,
-		narg,
-		base_sz,
-		count));
-
-	/*
-	 * check if number of arguments matches what the command expects
-	 */
-	if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
-		return -EINVAL;
-
-restart_args:
-	sz = xtra_sz + base_sz*count;
-	/*
-	 * limit abuse to min page size
-	 */
-	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
-		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
-		return -E2BIG;
-	}
-
-	/*
-	 * allocate default-sized argument buffer
-	 */
-	if (likely(count && args_k == NULL)) {
-		args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
-		if (args_k == NULL) return -ENOMEM;
-	}
-
-	ret = -EFAULT;
-
-	/*
-	 * copy arguments
-	 *
-	 * assume sz = 0 for command without parameters
-	 */
-	if (sz && copy_from_user(args_k, arg, sz)) {
-		DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
-		goto error_args;
-	}
-
-	/*
-	 * check if command supports extra parameters
-	 */
-	if (completed_args == 0 && getsize) {
-		/*
-		 * get extra parameters size (based on main argument)
-		 */
-		ret = (*getsize)(args_k, &xtra_sz);
-		if (ret) goto error_args;
-
-		completed_args = 1;
-
-		DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
-
-		/* retry if necessary */
-		if (likely(xtra_sz)) goto restart_args;
-	}
-
-	if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
-
-	ret = -EBADF;
-
-	file = fget(fd);
-	if (unlikely(file == NULL)) {
-		DPRINT(("invalid fd %d\n", fd));
-		goto error_args;
-	}
-	if (unlikely(PFM_IS_FILE(file) == 0)) {
-		DPRINT(("fd %d not related to perfmon\n", fd));
-		goto error_args;
-	}
-
-	ctx = (pfm_context_t *)file->private_data;
-	if (unlikely(ctx == NULL)) {
-		DPRINT(("no context for fd %d\n", fd));
-		goto error_args;
-	}
-	prefetch(&ctx->ctx_state);
-
-	PROTECT_CTX(ctx, flags);
-
-	/*
-	 * check task is stopped
-	 */
-	ret = pfm_check_task_state(ctx, cmd, flags);
-	if (unlikely(ret)) goto abort_locked;
-
-skip_fd:
-	ret = (*func)(ctx, args_k, count, task_pt_regs(current));
-
-	call_made = 1;
-
-abort_locked:
-	if (likely(ctx)) {
-		DPRINT(("context unlocked\n"));
-		UNPROTECT_CTX(ctx, flags);
-	}
-
-	/* copy argument back to user, if needed */
-	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
-
-error_args:
-	if (file)
-		fput(file);
-
-	kfree(args_k);
-
-	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
-
-	return ret;
-}
-
-static void
-pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
-{
-	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
-	pfm_ovfl_ctrl_t rst_ctrl;
-	int state;
-	int ret = 0;
-
-	state = ctx->ctx_state;
-	/*
-	 * Unlock sampling buffer and reset index atomically
-	 * XXX: not really needed when blocking
-	 */
-	if (CTX_HAS_SMPL(ctx)) {
-
-		rst_ctrl.bits.mask_monitoring = 0;
-		rst_ctrl.bits.reset_ovfl_pmds = 0;
-
-		if (state == PFM_CTX_LOADED)
-			ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
-		else
-			ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
-	} else {
-		rst_ctrl.bits.mask_monitoring = 0;
-		rst_ctrl.bits.reset_ovfl_pmds = 1;
-	}
-
-	if (ret == 0) {
-		if (rst_ctrl.bits.reset_ovfl_pmds) {
-			pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
-		}
-		if (rst_ctrl.bits.mask_monitoring == 0) {
-			DPRINT(("resuming monitoring\n"));
-			if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
-		} else {
-			DPRINT(("stopping monitoring\n"));
-			//pfm_stop_monitoring(current, regs);
-		}
-		ctx->ctx_state = PFM_CTX_LOADED;
-	}
-}
-
-/*
- * context MUST BE LOCKED when calling
- * can only be called for current
- */
-static void
-pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
-{
-	int ret;
-
-	DPRINT(("entering for [%d]\n", current->pid));
-
-	ret = pfm_context_unload(ctx, NULL, 0, regs);
-	if (ret) {
-		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
-	}
-
-	/*
-	 * and wakeup controlling task, indicating we are now disconnected
-	 */
-	wake_up_interruptible(&ctx->ctx_zombieq);
-
-	/*
-	 * given that context is still locked, the controlling
-	 * task will only get access when we return from
-	 * pfm_handle_work().
-	 */
-}
-
-static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
- /*
-  * pfm_handle_work() can be called with interrupts enabled
-  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
-  * call may sleep, therefore we must re-enable interrupts
-  * to avoid deadlocks. It is safe to do so because this function
-  * is called ONLY when returning to user level (PUStk=1), in which case
-  * there is no risk of kernel stack overflow due to deep
-  * interrupt nesting.
-  */
-void
-pfm_handle_work(void)
-{
-	pfm_context_t *ctx;
-	struct pt_regs *regs;
-	unsigned long flags, dummy_flags;
-	unsigned long ovfl_regs;
-	unsigned int reason;
-	int ret;
-
-	ctx = PFM_GET_CTX(current);
-	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
-		return;
-	}
-
-	PROTECT_CTX(ctx, flags);
-
-	PFM_SET_WORK_PENDING(current, 0);
-
-	pfm_clear_task_notify();
-
-	regs = task_pt_regs(current);
-
-	/*
-	 * extract reason for being here and clear
-	 */
-	reason = ctx->ctx_fl_trap_reason;
-	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
-	ovfl_regs = ctx->ctx_ovfl_regs[0];
-
-	DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
-
-	/*
-	 * must be done before we check for simple-reset mode
-	 */
-	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
-
-
-	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
-	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
-
-	/*
-	 * restore interrupt mask to what it was on entry.
-	 * Could be enabled/diasbled.
-	 */
-	UNPROTECT_CTX(ctx, flags);
-
-	/*
-	 * force interrupt enable because of down_interruptible()
-	 */
-	local_irq_enable();
-
-	DPRINT(("before block sleeping\n"));
-
-	/*
-	 * may go through without blocking on SMP systems
-	 * if restart has been received already by the time we call down()
-	 */
-	ret = wait_for_completion_interruptible(&ctx->ctx_restart_done);
-
-	DPRINT(("after block sleeping ret=%d\n", ret));
-
-	/*
-	 * lock context and mask interrupts again
-	 * We save flags into a dummy because we may have
-	 * altered interrupts mask compared to entry in this
-	 * function.
-	 */
-	PROTECT_CTX(ctx, dummy_flags);
-
-	/*
-	 * we need to read the ovfl_regs only after wake-up
-	 * because we may have had pfm_write_pmds() in between
-	 * and that can changed PMD values and therefore 
-	 * ovfl_regs is reset for these new PMD values.
-	 */
-	ovfl_regs = ctx->ctx_ovfl_regs[0];
-
-	if (ctx->ctx_fl_going_zombie) {
-do_zombie:
-		DPRINT(("context is zombie, bailing out\n"));
-		pfm_context_force_terminate(ctx, regs);
-		goto nothing_to_do;
-	}
-	/*
-	 * in case of interruption of down() we don't restart anything
-	 */
-	if (ret < 0) goto nothing_to_do;
-
-skip_blocking:
-	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
-	ctx->ctx_ovfl_regs[0] = 0UL;
-
-nothing_to_do:
-	/*
-	 * restore flags as they were upon entry
-	 */
-	UNPROTECT_CTX(ctx, flags);
-}
-
-static int
-pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
-{
-	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
-		DPRINT(("ignoring overflow notification, owner is zombie\n"));
-		return 0;
-	}
-
-	DPRINT(("waking up somebody\n"));
-
-	if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
-
-	/*
-	 * safe, we are not in intr handler, nor in ctxsw when
-	 * we come here
-	 */
-	kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
-
-	return 0;
-}
-
-static int
-pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
-{
-	pfm_msg_t *msg = NULL;
-
-	if (ctx->ctx_fl_no_msg == 0) {
-		msg = pfm_get_new_msg(ctx);
-		if (msg == NULL) {
-			printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
-			return -1;
-		}
-
-		msg->pfm_ovfl_msg.msg_type         = PFM_MSG_OVFL;
-		msg->pfm_ovfl_msg.msg_ctx_fd       = ctx->ctx_fd;
-		msg->pfm_ovfl_msg.msg_active_set   = 0;
-		msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
-		msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
-		msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
-		msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
-		msg->pfm_ovfl_msg.msg_tstamp       = 0UL;
-	}
-
-	DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
-		msg,
-		ctx->ctx_fl_no_msg,
-		ctx->ctx_fd,
-		ovfl_pmds));
-
-	return pfm_notify_user(ctx, msg);
-}
-
-static int
-pfm_end_notify_user(pfm_context_t *ctx)
-{
-	pfm_msg_t *msg;
-
-	msg = pfm_get_new_msg(ctx);
-	if (msg == NULL) {
-		printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
-		return -1;
-	}
-	/* no leak */
-	memset(msg, 0, sizeof(*msg));
-
-	msg->pfm_end_msg.msg_type    = PFM_MSG_END;
-	msg->pfm_end_msg.msg_ctx_fd  = ctx->ctx_fd;
-	msg->pfm_ovfl_msg.msg_tstamp = 0UL;
-
-	DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
-		msg,
-		ctx->ctx_fl_no_msg,
-		ctx->ctx_fd));
-
-	return pfm_notify_user(ctx, msg);
-}
-
-/*
- * main overflow processing routine.
- * it can be called from the interrupt path or explicitely during the context switch code
- */
-static void
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
-{
-	pfm_ovfl_arg_t *ovfl_arg;
-	unsigned long mask;
-	unsigned long old_val, ovfl_val, new_val;
-	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
-	unsigned long tstamp;
-	pfm_ovfl_ctrl_t	ovfl_ctrl;
-	unsigned int i, has_smpl;
-	int must_notify = 0;
-
-	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
-
-	/*
-	 * sanity test. Should never happen
-	 */
-	if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
-
-	tstamp   = ia64_get_itc();
-	mask     = pmc0 >> PMU_FIRST_COUNTER;
-	ovfl_val = pmu_conf->ovfl_val;
-	has_smpl = CTX_HAS_SMPL(ctx);
-
-	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
-		     "used_pmds=0x%lx\n",
-			pmc0,
-			task ? task->pid: -1,
-			(regs ? regs->cr_iip : 0),
-			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
-			ctx->ctx_used_pmds[0]));
-
-
-	/*
-	 * first we update the virtual counters
-	 * assume there was a prior ia64_srlz_d() issued
-	 */
-	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
-
-		/* skip pmd which did not overflow */
-		if ((mask & 0x1) == 0) continue;
-
-		/*
-		 * Note that the pmd is not necessarily 0 at this point as qualified events
-		 * may have happened before the PMU was frozen. The residual count is not
-		 * taken into consideration here but will be with any read of the pmd via
-		 * pfm_read_pmds().
-		 */
-		old_val              = new_val = ctx->ctx_pmds[i].val;
-		new_val             += 1 + ovfl_val;
-		ctx->ctx_pmds[i].val = new_val;
-
-		/*
-		 * check for overflow condition
-		 */
-		if (likely(old_val > new_val)) {
-			ovfl_pmds |= 1UL << i;
-			if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
-		}
-
-		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
-			i,
-			new_val,
-			old_val,
-			ia64_get_pmd(i) & ovfl_val,
-			ovfl_pmds,
-			ovfl_notify));
-	}
-
-	/*
-	 * there was no 64-bit overflow, nothing else to do
-	 */
-	if (ovfl_pmds == 0UL) return;
-
-	/* 
-	 * reset all control bits
-	 */
-	ovfl_ctrl.val = 0;
-	reset_pmds    = 0UL;
-
-	/*
-	 * if a sampling format module exists, then we "cache" the overflow by 
-	 * calling the module's handler() routine.
-	 */
-	if (has_smpl) {
-		unsigned long start_cycles, end_cycles;
-		unsigned long pmd_mask;
-		int j, k, ret = 0;
-		int this_cpu = smp_processor_id();
-
-		pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
-		ovfl_arg = &ctx->ctx_ovfl_arg;
-
-		prefetch(ctx->ctx_smpl_hdr);
-
-		for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
-
-			mask = 1UL << i;
-
-			if ((pmd_mask & 0x1) == 0) continue;
-
-			ovfl_arg->ovfl_pmd      = (unsigned char )i;
-			ovfl_arg->ovfl_notify   = ovfl_notify & mask ? 1 : 0;
-			ovfl_arg->active_set    = 0;
-			ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
-			ovfl_arg->smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
-
-			ovfl_arg->pmd_value      = ctx->ctx_pmds[i].val;
-			ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
-			ovfl_arg->pmd_eventid    = ctx->ctx_pmds[i].eventid;
-
-			/*
-		 	 * copy values of pmds of interest. Sampling format may copy them
-		 	 * into sampling buffer.
-		 	 */
-			if (smpl_pmds) {
-				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
-					if ((smpl_pmds & 0x1) == 0) continue;
-					ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
-					DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
-				}
-			}
-
-			pfm_stats[this_cpu].pfm_smpl_handler_calls++;
-
-			start_cycles = ia64_get_itc();
-
-			/*
-		 	 * call custom buffer format record (handler) routine
-		 	 */
-			ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
-
-			end_cycles = ia64_get_itc();
-
-			/*
-			 * For those controls, we take the union because they have
-			 * an all or nothing behavior.
-			 */
-			ovfl_ctrl.bits.notify_user     |= ovfl_arg->ovfl_ctrl.bits.notify_user;
-			ovfl_ctrl.bits.block_task      |= ovfl_arg->ovfl_ctrl.bits.block_task;
-			ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
-			/*
-			 * build the bitmask of pmds to reset now
-			 */
-			if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
-
-			pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
-		}
-		/*
-		 * when the module cannot handle the rest of the overflows, we abort right here
-		 */
-		if (ret && pmd_mask) {
-			DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
-				pmd_mask<<PMU_FIRST_COUNTER));
-		}
-		/*
-		 * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
-		 */
-		ovfl_pmds &= ~reset_pmds;
-	} else {
-		/*
-		 * when no sampling module is used, then the default
-		 * is to notify on overflow if requested by user
-		 */
-		ovfl_ctrl.bits.notify_user     = ovfl_notify ? 1 : 0;
-		ovfl_ctrl.bits.block_task      = ovfl_notify ? 1 : 0;
-		ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
-		ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
-		/*
-		 * if needed, we reset all overflowed pmds
-		 */
-		if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
-	}
-
-	DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
-
-	/*
-	 * reset the requested PMD registers using the short reset values
-	 */
-	if (reset_pmds) {
-		unsigned long bm = reset_pmds;
-		pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
-	}
-
-	if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
-		/*
-		 * keep track of what to reset when unblocking
-		 */
-		ctx->ctx_ovfl_regs[0] = ovfl_pmds;
-
-		/*
-		 * check for blocking context 
-		 */
-		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
-
-			ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
-
-			/*
-			 * set the perfmon specific checking pending work for the task
-			 */
-			PFM_SET_WORK_PENDING(task, 1);
-
-			/*
-			 * when coming from ctxsw, current still points to the
-			 * previous task, therefore we must work with task and not current.
-			 */
-			pfm_set_task_notify(task);
-		}
-		/*
-		 * defer until state is changed (shorten spin window). the context is locked
-		 * anyway, so the signal receiver would come spin for nothing.
-		 */
-		must_notify = 1;
-	}
-
-	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
-			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
-			PFM_GET_WORK_PENDING(task),
-			ctx->ctx_fl_trap_reason,
-			ovfl_pmds,
-			ovfl_notify,
-			ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
-	/*
-	 * in case monitoring must be stopped, we toggle the psr bits
-	 */
-	if (ovfl_ctrl.bits.mask_monitoring) {
-		pfm_mask_monitoring(task);
-		ctx->ctx_state = PFM_CTX_MASKED;
-		ctx->ctx_fl_can_restart = 1;
-	}
-
-	/*
-	 * send notification now
-	 */
-	if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
-
-	return;
-
-sanity_check:
-	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
-			smp_processor_id(),
-			task ? task->pid : -1,
-			pmc0);
-	return;
-
-stop_monitoring:
-	/*
-	 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
-	 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
-	 * come here as zombie only if the task is the current task. In which case, we
-	 * can access the PMU  hardware directly.
-	 *
-	 * Note that zombies do have PM_VALID set. So here we do the minimal.
-	 *
-	 * In case the context was zombified it could not be reclaimed at the time
-	 * the monitoring program exited. At this point, the PMU reservation has been
-	 * returned, the sampiing buffer has been freed. We must convert this call
-	 * into a spurious interrupt. However, we must also avoid infinite overflows
-	 * by stopping monitoring for this task. We can only come here for a per-task
-	 * context. All we need to do is to stop monitoring using the psr bits which
-	 * are always task private. By re-enabling secure montioring, we ensure that
-	 * the monitored task will not be able to re-activate monitoring.
-	 * The task will eventually be context switched out, at which point the context
-	 * will be reclaimed (that includes releasing ownership of the PMU).
-	 *
-	 * So there might be a window of time where the number of per-task session is zero
-	 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
-	 * context. This is safe because if a per-task session comes in, it will push this one
-	 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
-	 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
-	 * also push our zombie context out.
-	 *
-	 * Overall pretty hairy stuff....
-	 */
-	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
-	pfm_clear_psr_up();
-	ia64_psr(regs)->up = 0;
-	ia64_psr(regs)->sp = 1;
-	return;
-}
-
-static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
-{
-	struct task_struct *task;
-	pfm_context_t *ctx;
-	unsigned long flags;
-	u64 pmc0;
-	int this_cpu = smp_processor_id();
-	int retval = 0;
-
-	pfm_stats[this_cpu].pfm_ovfl_intr_count++;
-
-	/*
-	 * srlz.d done before arriving here
-	 */
-	pmc0 = ia64_get_pmc(0);
-
-	task = GET_PMU_OWNER();
-	ctx  = GET_PMU_CTX();
-
-	/*
-	 * if we have some pending bits set
-	 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
-	 */
-	if (PMC0_HAS_OVFL(pmc0) && task) {
-		/*
-		 * we assume that pmc0.fr is always set here
-		 */
-
-		/* sanity check */
-		if (!ctx) goto report_spurious1;
-
-		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 
-			goto report_spurious2;
-
-		PROTECT_CTX_NOPRINT(ctx, flags);
-
-		pfm_overflow_handler(task, ctx, pmc0, regs);
-
-		UNPROTECT_CTX_NOPRINT(ctx, flags);
-
-	} else {
-		pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
-		retval = -1;
-	}
-	/*
-	 * keep it unfrozen at all times
-	 */
-	pfm_unfreeze_pmu();
-
-	return retval;
-
-report_spurious1:
-	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
-		this_cpu, task->pid);
-	pfm_unfreeze_pmu();
-	return -1;
-report_spurious2:
-	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
-		this_cpu, 
-		task->pid);
-	pfm_unfreeze_pmu();
-	return -1;
-}
-
-static irqreturn_t
-pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
-{
-	unsigned long start_cycles, total_cycles;
-	unsigned long min, max;
-	int this_cpu;
-	int ret;
-
-	this_cpu = get_cpu();
-	if (likely(!pfm_alt_intr_handler)) {
-		min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
-		max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
-
-		start_cycles = ia64_get_itc();
-
-		ret = pfm_do_interrupt_handler(irq, arg, regs);
-
-		total_cycles = ia64_get_itc();
-
-		/*
-		 * don't measure spurious interrupts
-		 */
-		if (likely(ret == 0)) {
-			total_cycles -= start_cycles;
-
-			if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
-			if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
-
-			pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
-		}
-	}
-	else {
-		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
-	}
-
-	put_cpu_no_resched();
-	return IRQ_HANDLED;
-}
-
-/*
- * /proc/perfmon interface, for debug only
- */
-
-#define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)
-
-static void *
-pfm_proc_start(struct seq_file *m, loff_t *pos)
-{
-	if (*pos == 0) {
-		return PFM_PROC_SHOW_HEADER;
-	}
-
-	while (*pos <= NR_CPUS) {
-		if (cpu_online(*pos - 1)) {
-			return (void *)*pos;
-		}
-		++*pos;
-	}
-	return NULL;
-}
-
-static void *
-pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return pfm_proc_start(m, pos);
-}
-
-static void
-pfm_proc_stop(struct seq_file *m, void *v)
-{
-}
-
-static void
-pfm_proc_show_header(struct seq_file *m)
-{
-	struct list_head * pos;
-	pfm_buffer_fmt_t * entry;
-	unsigned long flags;
-
- 	seq_printf(m,
-		"perfmon version           : %u.%u\n"
-		"model                     : %s\n"
-		"fastctxsw                 : %s\n"
-		"expert mode               : %s\n"
-		"ovfl_mask                 : 0x%lx\n"
-		"PMU flags                 : 0x%x\n",
-		PFM_VERSION_MAJ, PFM_VERSION_MIN,
-		pmu_conf->pmu_name,
-		pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
-		pfm_sysctl.expert_mode > 0 ? "Yes": "No",
-		pmu_conf->ovfl_val,
-		pmu_conf->flags);
-
-  	LOCK_PFS(flags);
-
- 	seq_printf(m,
- 		"proc_sessions             : %u\n"
- 		"sys_sessions              : %u\n"
- 		"sys_use_dbregs            : %u\n"
- 		"ptrace_use_dbregs         : %u\n",
- 		pfm_sessions.pfs_task_sessions,
- 		pfm_sessions.pfs_sys_sessions,
- 		pfm_sessions.pfs_sys_use_dbregs,
- 		pfm_sessions.pfs_ptrace_use_dbregs);
-
-  	UNLOCK_PFS(flags);
-
-	spin_lock(&pfm_buffer_fmt_lock);
-
-	list_for_each(pos, &pfm_buffer_fmt_list) {
-		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
-		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
-			entry->fmt_uuid[0],
-			entry->fmt_uuid[1],
-			entry->fmt_uuid[2],
-			entry->fmt_uuid[3],
-			entry->fmt_uuid[4],
-			entry->fmt_uuid[5],
-			entry->fmt_uuid[6],
-			entry->fmt_uuid[7],
-			entry->fmt_uuid[8],
-			entry->fmt_uuid[9],
-			entry->fmt_uuid[10],
-			entry->fmt_uuid[11],
-			entry->fmt_uuid[12],
-			entry->fmt_uuid[13],
-			entry->fmt_uuid[14],
-			entry->fmt_uuid[15],
-			entry->fmt_name);
-	}
-	spin_unlock(&pfm_buffer_fmt_lock);
-
-}
-
-static int
-pfm_proc_show(struct seq_file *m, void *v)
-{
-	unsigned long psr;
-	unsigned int i;
-	int cpu;
-
-	if (v == PFM_PROC_SHOW_HEADER) {
-		pfm_proc_show_header(m);
-		return 0;
-	}
-
-	/* show info for CPU (v - 1) */
-
-	cpu = (long)v - 1;
-	seq_printf(m,
-		"CPU%-2d overflow intrs      : %lu\n"
-		"CPU%-2d overflow cycles     : %lu\n"
-		"CPU%-2d overflow min        : %lu\n"
-		"CPU%-2d overflow max        : %lu\n"
-		"CPU%-2d smpl handler calls  : %lu\n"
-		"CPU%-2d smpl handler cycles : %lu\n"
-		"CPU%-2d spurious intrs      : %lu\n"
-		"CPU%-2d replay   intrs      : %lu\n"
-		"CPU%-2d syst_wide           : %d\n"
-		"CPU%-2d dcr_pp              : %d\n"
-		"CPU%-2d exclude idle        : %d\n"
-		"CPU%-2d owner               : %d\n"
-		"CPU%-2d context             : %p\n"
-		"CPU%-2d activations         : %lu\n",
-		cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
-		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
-		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
-		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
-		cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
-		cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
-		cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
-		cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
-		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
-		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
-		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
-		cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
-		cpu, pfm_get_cpu_data(pmu_ctx, cpu),
-		cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
-
-	if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
-
-		psr = pfm_get_psr();
-
-		ia64_srlz_d();
-
-		seq_printf(m, 
-			"CPU%-2d psr                 : 0x%lx\n"
-			"CPU%-2d pmc0                : 0x%lx\n", 
-			cpu, psr,
-			cpu, ia64_get_pmc(0));
-
-		for (i=0; PMC_IS_LAST(i) == 0;  i++) {
-			if (PMC_IS_COUNTING(i) == 0) continue;
-   			seq_printf(m, 
-				"CPU%-2d pmc%u                : 0x%lx\n"
-   				"CPU%-2d pmd%u                : 0x%lx\n", 
-				cpu, i, ia64_get_pmc(i),
-				cpu, i, ia64_get_pmd(i));
-  		}
-	}
-	return 0;
-}
-
-struct seq_operations pfm_seq_ops = {
-	.start =	pfm_proc_start,
- 	.next =		pfm_proc_next,
- 	.stop =		pfm_proc_stop,
- 	.show =		pfm_proc_show
-};
-
-static int
-pfm_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &pfm_seq_ops);
-}
-
-
-/*
- * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
- * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
- * is active or inactive based on mode. We must rely on the value in
- * local_cpu_data->pfm_syst_info
- */
-void
-pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
-{
-	struct pt_regs *regs;
-	unsigned long dcr;
-	unsigned long dcr_pp;
-
-	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
-
-	/*
-	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
-	 * on every CPU, so we can rely on the pid to identify the idle task.
-	 */
-	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
-		regs = task_pt_regs(task);
-		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
-		return;
-	}
-	/*
-	 * if monitoring has started
-	 */
-	if (dcr_pp) {
-		dcr = ia64_getreg(_IA64_REG_CR_DCR);
-		/*
-		 * context switching in?
-		 */
-		if (is_ctxswin) {
-			/* mask monitoring for the idle task */
-			ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
-			pfm_clear_psr_pp();
-			ia64_srlz_i();
-			return;
-		}
-		/*
-		 * context switching out
-		 * restore monitoring for next task
-		 *
-		 * Due to inlining this odd if-then-else construction generates
-		 * better code.
-		 */
-		ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
-		pfm_set_psr_pp();
-		ia64_srlz_i();
-	}
-}
-
-#ifdef CONFIG_SMP
-
-static void
-pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
-{
-	struct task_struct *task = ctx->ctx_task;
-
-	ia64_psr(regs)->up = 0;
-	ia64_psr(regs)->sp = 1;
-
-	if (GET_PMU_OWNER() == task) {
-		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
-		SET_PMU_OWNER(NULL, NULL);
-	}
-
-	/*
-	 * disconnect the task from the context and vice-versa
-	 */
-	PFM_SET_WORK_PENDING(task, 0);
-
-	task->thread.pfm_context  = NULL;
-	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
-
-	DPRINT(("force cleanup for [%d]\n",  task->pid));
-}
-
-
-/*
- * in 2.6, interrupts are masked when we come here and the runqueue lock is held
- */
-void
-pfm_save_regs(struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	struct thread_struct *t;
-	unsigned long flags;
-	u64 psr;
-
-
-	ctx = PFM_GET_CTX(task);
-	if (ctx == NULL) return;
-	t = &task->thread;
-
-	/*
- 	 * we always come here with interrupts ALREADY disabled by
- 	 * the scheduler. So we simply need to protect against concurrent
-	 * access, not CPU concurrency.
-	 */
-	flags = pfm_protect_ctx_ctxsw(ctx);
-
-	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
-		struct pt_regs *regs = task_pt_regs(task);
-
-		pfm_clear_psr_up();
-
-		pfm_force_cleanup(ctx, regs);
-
-		BUG_ON(ctx->ctx_smpl_hdr);
-
-		pfm_unprotect_ctx_ctxsw(ctx, flags);
-
-		pfm_context_free(ctx);
-		return;
-	}
-
-	/*
-	 * save current PSR: needed because we modify it
-	 */
-	ia64_srlz_d();
-	psr = pfm_get_psr();
-
-	BUG_ON(psr & (IA64_PSR_I));
-
-	/*
-	 * stop monitoring:
-	 * This is the last instruction which may generate an overflow
-	 *
-	 * We do not need to set psr.sp because, it is irrelevant in kernel.
-	 * It will be restored from ipsr when going back to user level
-	 */
-	pfm_clear_psr_up();
-
-	/*
-	 * keep a copy of psr.up (for reload)
-	 */
-	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
-
-	/*
-	 * release ownership of this PMU.
-	 * PM interrupts are masked, so nothing
-	 * can happen.
-	 */
-	SET_PMU_OWNER(NULL, NULL);
-
-	/*
-	 * we systematically save the PMD as we have no
-	 * guarantee we will be schedule at that same
-	 * CPU again.
-	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
-
-	/*
-	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
-	 * we will need it on the restore path to check
-	 * for pending overflow.
-	 */
-	t->pmcs[0] = ia64_get_pmc(0);
-
-	/*
-	 * unfreeze PMU if had pending overflows
-	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
-
-	/*
-	 * finally, allow context access.
-	 * interrupts will still be masked after this call.
-	 */
-	pfm_unprotect_ctx_ctxsw(ctx, flags);
-}
-
-#else /* !CONFIG_SMP */
-void
-pfm_save_regs(struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	u64 psr;
-
-	ctx = PFM_GET_CTX(task);
-	if (ctx == NULL) return;
-
-	/*
-	 * save current PSR: needed because we modify it
-	 */
-	psr = pfm_get_psr();
-
-	BUG_ON(psr & (IA64_PSR_I));
-
-	/*
-	 * stop monitoring:
-	 * This is the last instruction which may generate an overflow
-	 *
-	 * We do not need to set psr.sp because, it is irrelevant in kernel.
-	 * It will be restored from ipsr when going back to user level
-	 */
-	pfm_clear_psr_up();
-
-	/*
-	 * keep a copy of psr.up (for reload)
-	 */
-	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
-}
-
-static void
-pfm_lazy_save_regs (struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	struct thread_struct *t;
-	unsigned long flags;
-
-	{ u64 psr  = pfm_get_psr();
-	  BUG_ON(psr & IA64_PSR_UP);
-	}
-
-	ctx = PFM_GET_CTX(task);
-	t   = &task->thread;
-
-	/*
-	 * we need to mask PMU overflow here to
-	 * make sure that we maintain pmc0 until
-	 * we save it. overflow interrupts are
-	 * treated as spurious if there is no
-	 * owner.
-	 *
-	 * XXX: I don't think this is necessary
-	 */
-	PROTECT_CTX(ctx,flags);
-
-	/*
-	 * release ownership of this PMU.
-	 * must be done before we save the registers.
-	 *
-	 * after this call any PMU interrupt is treated
-	 * as spurious.
-	 */
-	SET_PMU_OWNER(NULL, NULL);
-
-	/*
-	 * save all the pmds we use
-	 */
-	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
-
-	/*
-	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
-	 * it is needed to check for pended overflow
-	 * on the restore path
-	 */
-	t->pmcs[0] = ia64_get_pmc(0);
-
-	/*
-	 * unfreeze PMU if had pending overflows
-	 */
-	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
-
-	/*
-	 * now get can unmask PMU interrupts, they will
-	 * be treated as purely spurious and we will not
-	 * lose any information
-	 */
-	UNPROTECT_CTX(ctx,flags);
-}
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_SMP
-/*
- * in 2.6, interrupts are masked when we come here and the runqueue lock is held
- */
-void
-pfm_load_regs (struct task_struct *task)
-{
-	pfm_context_t *ctx;
-	struct thread_struct *t;
-	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
-	unsigned long flags;
-	u64 psr, psr_up;
-	int need_irq_resend;
-
-	ctx = PFM_GET_CTX(task);
-	if (unlikely(ctx == NULL)) return;
-
-	BUG_ON(GET_PMU_OWNER());
-
-	t     = &task->thread;
-	/*
-	 * possible on unload
-	 */
-	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
-
-	/*
- 	 * we always come here with interrupts ALREADY disabled by
- 	 * the scheduler. So we simply need to protect against concurrent
-	 * access, not CPU concurrency.
-	 */
-	flags = pfm_protect_ctx_ctxsw(ctx);
-	psr   = pfm_get_psr();
-
-	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
-
-	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
-	BUG_ON(psr & IA64_PSR_I);
-
-	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
-		struct pt_regs *regs = task_pt_regs(task);
-
-		BUG_ON(ctx->ctx_smpl_hdr);
-
-		pfm_force_cleanup(ctx, regs);
-
-		pfm_unprotect_ctx_ctxsw(ctx, flags);
-
-		/*
-		 * this one (kmalloc'ed) is fine with interrupts disabled
-		 */
-		pfm_context_free(ctx);
-
-		return;
-	}
-
-	/*
-	 * we restore ALL the debug registers to avoid picking up
-	 * stale state.
-	 */
-	if (ctx->ctx_fl_using_dbreg) {
-		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
-		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
-	}
-	/*
-	 * retrieve saved psr.up
-	 */
-	psr_up = ctx->ctx_saved_psr_up;
-
-	/*
-	 * if we were the last user of the PMU on that CPU,
-	 * then nothing to do except restore psr
-	 */
-	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
-
-		/*
-		 * retrieve partial reload masks (due to user modifications)
-		 */
-		pmc_mask = ctx->ctx_reload_pmcs[0];
-		pmd_mask = ctx->ctx_reload_pmds[0];
-
-	} else {
-		/*
-	 	 * To avoid leaking information to the user level when psr.sp=0,
-	 	 * we must reload ALL implemented pmds (even the ones we don't use).
-	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
-	 	 * we initialized or requested (sampling) so there is no risk there.
-	 	 */
-		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
-
-		/*
-	 	 * ALL accessible PMCs are systematically reloaded, unused registers
-	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
-	 	 * up stale configuration.
-	 	 *
-	 	 * PMC0 is never in the mask. It is always restored separately.
-	 	 */
-		pmc_mask = ctx->ctx_all_pmcs[0];
-	}
-	/*
-	 * when context is MASKED, we will restore PMC with plm=0
-	 * and PMD with stale information, but that's ok, nothing
-	 * will be captured.
-	 *
-	 * XXX: optimize here
-	 */
-	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
-	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
-
-	/*
-	 * check for pending overflow at the time the state
-	 * was saved.
-	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
-		/*
-		 * reload pmc0 with the overflow information
-		 * On McKinley PMU, this will trigger a PMU interrupt
-		 */
-		ia64_set_pmc(0, t->pmcs[0]);
-		ia64_srlz_d();
-		t->pmcs[0] = 0UL;
-
-		/*
-		 * will replay the PMU interrupt
-		 */
-		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
-
-		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
-	}
-
-	/*
-	 * we just did a reload, so we reset the partial reload fields
-	 */
-	ctx->ctx_reload_pmcs[0] = 0UL;
-	ctx->ctx_reload_pmds[0] = 0UL;
-
-	SET_LAST_CPU(ctx, smp_processor_id());
-
-	/*
-	 * dump activation value for this PMU
-	 */
-	INC_ACTIVATION();
-	/*
-	 * record current activation for this context
-	 */
-	SET_ACTIVATION(ctx);
-
-	/*
-	 * establish new ownership. 
-	 */
-	SET_PMU_OWNER(task, ctx);
-
-	/*
-	 * restore the psr.up bit. measurement
-	 * is active again.
-	 * no PMU interrupt can happen at this point
-	 * because we still have interrupts disabled.
-	 */
-	if (likely(psr_up)) pfm_set_psr_up();
-
-	/*
-	 * allow concurrent access to context
-	 */
-	pfm_unprotect_ctx_ctxsw(ctx, flags);
-}
-#else /*  !CONFIG_SMP */
-/*
- * reload PMU state for UP kernels
- * in 2.5 we come here with interrupts disabled
- */
-void
-pfm_load_regs (struct task_struct *task)
-{
-	struct thread_struct *t;
-	pfm_context_t *ctx;
-	struct task_struct *owner;
-	unsigned long pmd_mask, pmc_mask;
-	u64 psr, psr_up;
-	int need_irq_resend;
-
-	owner = GET_PMU_OWNER();
-	ctx   = PFM_GET_CTX(task);
-	t     = &task->thread;
-	psr   = pfm_get_psr();
-
-	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
-	BUG_ON(psr & IA64_PSR_I);
-
-	/*
-	 * we restore ALL the debug registers to avoid picking up
-	 * stale state.
-	 *
-	 * This must be done even when the task is still the owner
-	 * as the registers may have been modified via ptrace()
-	 * (not perfmon) by the previous task.
-	 */
-	if (ctx->ctx_fl_using_dbreg) {
-		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
-		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
-	}
-
-	/*
-	 * retrieved saved psr.up
-	 */
-	psr_up = ctx->ctx_saved_psr_up;
-	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
-
-	/*
-	 * short path, our state is still there, just
-	 * need to restore psr and we go
-	 *
-	 * we do not touch either PMC nor PMD. the psr is not touched
-	 * by the overflow_handler. So we are safe w.r.t. to interrupt
-	 * concurrency even without interrupt masking.
-	 */
-	if (likely(owner == task)) {
-		if (likely(psr_up)) pfm_set_psr_up();
-		return;
-	}
-
-	/*
-	 * someone else is still using the PMU, first push it out and
-	 * then we'll be able to install our stuff !
-	 *
-	 * Upon return, there will be no owner for the current PMU
-	 */
-	if (owner) pfm_lazy_save_regs(owner);
-
-	/*
-	 * To avoid leaking information to the user level when psr.sp=0,
-	 * we must reload ALL implemented pmds (even the ones we don't use).
-	 * In the kernel we only allow PFM_READ_PMDS on registers which
-	 * we initialized or requested (sampling) so there is no risk there.
-	 */
-	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
-
-	/*
-	 * ALL accessible PMCs are systematically reloaded, unused registers
-	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
-	 * up stale configuration.
-	 *
-	 * PMC0 is never in the mask. It is always restored separately
-	 */
-	pmc_mask = ctx->ctx_all_pmcs[0];
-
-	pfm_restore_pmds(t->pmds, pmd_mask);
-	pfm_restore_pmcs(t->pmcs, pmc_mask);
-
-	/*
-	 * check for pending overflow at the time the state
-	 * was saved.
-	 */
-	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
-		/*
-		 * reload pmc0 with the overflow information
-		 * On McKinley PMU, this will trigger a PMU interrupt
-		 */
-		ia64_set_pmc(0, t->pmcs[0]);
-		ia64_srlz_d();
-
-		t->pmcs[0] = 0UL;
-
-		/*
-		 * will replay the PMU interrupt
-		 */
-		if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR);
-
-		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
-	}
-
-	/*
-	 * establish new ownership. 
-	 */
-	SET_PMU_OWNER(task, ctx);
-
-	/*
-	 * restore the psr.up bit. measurement
-	 * is active again.
-	 * no PMU interrupt can happen at this point
-	 * because we still have interrupts disabled.
-	 */
-	if (likely(psr_up)) pfm_set_psr_up();
-}
-#endif /* CONFIG_SMP */
-
-/*
- * this function assumes monitoring is stopped
- */
-static void
-pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
-{
-	u64 pmc0;
-	unsigned long mask2, val, pmd_val, ovfl_val;
-	int i, can_access_pmu = 0;
-	int is_self;
-
-	/*
-	 * is the caller the task being monitored (or which initiated the
-	 * session for system wide measurements)
-	 */
-	is_self = ctx->ctx_task == task ? 1 : 0;
-
-	/*
-	 * can access PMU is task is the owner of the PMU state on the current CPU
-	 * or if we are running on the CPU bound to the context in system-wide mode
-	 * (that is not necessarily the task the context is attached to in this mode).
-	 * In system-wide we always have can_access_pmu true because a task running on an
-	 * invalid processor is flagged earlier in the call stack (see pfm_stop).
-	 */
-	can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
-	if (can_access_pmu) {
-		/*
-		 * Mark the PMU as not owned
-		 * This will cause the interrupt handler to do nothing in case an overflow
-		 * interrupt was in-flight
-		 * This also guarantees that pmc0 will contain the final state
-		 * It virtually gives us full control on overflow processing from that point
-		 * on.
-		 */
-		SET_PMU_OWNER(NULL, NULL);
-		DPRINT(("releasing ownership\n"));
-
-		/*
-		 * read current overflow status:
-		 *
-		 * we are guaranteed to read the final stable state
-		 */
-		ia64_srlz_d();
-		pmc0 = ia64_get_pmc(0); /* slow */
-
-		/*
-		 * reset freeze bit, overflow status information destroyed
-		 */
-		pfm_unfreeze_pmu();
-	} else {
-		pmc0 = task->thread.pmcs[0];
-		/*
-		 * clear whatever overflow status bits there were
-		 */
-		task->thread.pmcs[0] = 0;
-	}
-	ovfl_val = pmu_conf->ovfl_val;
-	/*
-	 * we save all the used pmds
-	 * we take care of overflows for counting PMDs
-	 *
-	 * XXX: sampling situation is not taken into account here
-	 */
-	mask2 = ctx->ctx_used_pmds[0];
-
-	DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
-
-	for (i = 0; mask2; i++, mask2>>=1) {
-
-		/* skip non used pmds */
-		if ((mask2 & 0x1) == 0) continue;
-
-		/*
-		 * can access PMU always true in system wide mode
-		 */
-		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
-
-		if (PMD_IS_COUNTING(i)) {
-			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
-				task->pid,
-				i,
-				ctx->ctx_pmds[i].val,
-				val & ovfl_val));
-
-			/*
-			 * we rebuild the full 64 bit value of the counter
-			 */
-			val = ctx->ctx_pmds[i].val + (val & ovfl_val);
-
-			/*
-			 * now everything is in ctx_pmds[] and we need
-			 * to clear the saved context from save_regs() such that
-			 * pfm_read_pmds() gets the correct value
-			 */
-			pmd_val = 0UL;
-
-			/*
-			 * take care of overflow inline
-			 */
-			if (pmc0 & (1UL << i)) {
-				val += 1 + ovfl_val;
-				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
-			}
-		}
-
-		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
-
-		if (is_self) task->thread.pmds[i] = pmd_val;
-
-		ctx->ctx_pmds[i].val = val;
-	}
-}
-
-static struct irqaction perfmon_irqaction = {
-	.handler = pfm_interrupt_handler,
-	.flags   = IRQF_DISABLED,
-	.name    = "perfmon"
-};
-
-static void
-pfm_alt_save_pmu_state(void *data)
-{
-	struct pt_regs *regs;
-
-	regs = task_pt_regs(current);
-
-	DPRINT(("called\n"));
-
-	/*
-	 * should not be necessary but
-	 * let's take not risk
-	 */
-	pfm_clear_psr_up();
-	pfm_clear_psr_pp();
-	ia64_psr(regs)->pp = 0;
-
-	/*
-	 * This call is required
-	 * May cause a spurious interrupt on some processors
-	 */
-	pfm_freeze_pmu();
-
-	ia64_srlz_d();
-}
-
-void
-pfm_alt_restore_pmu_state(void *data)
-{
-	struct pt_regs *regs;
-
-	regs = task_pt_regs(current);
-
-	DPRINT(("called\n"));
-
-	/*
-	 * put PMU back in state expected
-	 * by perfmon
-	 */
-	pfm_clear_psr_up();
-	pfm_clear_psr_pp();
-	ia64_psr(regs)->pp = 0;
-
-	/*
-	 * perfmon runs with PMU unfrozen at all times
-	 */
-	pfm_unfreeze_pmu();
-
-	ia64_srlz_d();
-}
-
-int
-pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
-{
-	int ret, i;
-	int reserve_cpu;
-
-	/* some sanity checks */
-	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
-
-	/* do the easy test first */
-	if (pfm_alt_intr_handler) return -EBUSY;
-
-	/* one at a time in the install or remove, just fail the others */
-	if (!spin_trylock(&pfm_alt_install_check)) {
-		return -EBUSY;
-	}
-
-	/* reserve our session */
-	for_each_online_cpu(reserve_cpu) {
-		ret = pfm_reserve_session(NULL, 1, reserve_cpu);
-		if (ret) goto cleanup_reserve;
-	}
-
-	/* save the current system wide pmu states */
-	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
-	if (ret) {
-		DPRINT(("on_each_cpu() failed: %d\n", ret));
-		goto cleanup_reserve;
-	}
-
-	/* officially change to the alternate interrupt handler */
-	pfm_alt_intr_handler = hdl;
-
-	spin_unlock(&pfm_alt_install_check);
-
-	return 0;
-
-cleanup_reserve:
-	for_each_online_cpu(i) {
-		/* don't unreserve more than we reserved */
-		if (i >= reserve_cpu) break;
-
-		pfm_unreserve_session(NULL, 1, i);
-	}
-
-	spin_unlock(&pfm_alt_install_check);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
-
-int
-pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
-{
-	int i;
-	int ret;
-
-	if (hdl == NULL) return -EINVAL;
-
-	/* cannot remove someone else's handler! */
-	if (pfm_alt_intr_handler != hdl) return -EINVAL;
-
-	/* one at a time in the install or remove, just fail the others */
-	if (!spin_trylock(&pfm_alt_install_check)) {
-		return -EBUSY;
-	}
-
-	pfm_alt_intr_handler = NULL;
-
-	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
-	if (ret) {
-		DPRINT(("on_each_cpu() failed: %d\n", ret));
-	}
-
-	for_each_online_cpu(i) {
-		pfm_unreserve_session(NULL, 1, i);
-	}
-
-	spin_unlock(&pfm_alt_install_check);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
-
-/*
- * perfmon initialization routine, called from the initcall() table
- */
-static int init_pfm_fs(void);
-
-static int __init
-pfm_probe_pmu(void)
-{
-	pmu_config_t **p;
-	int family;
-
-	family = local_cpu_data->family;
-	p      = pmu_confs;
-
-	while(*p) {
-		if ((*p)->probe) {
-			if ((*p)->probe() == 0) goto found;
-		} else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
-			goto found;
-		}
-		p++;
-	}
-	return -1;
-found:
-	pmu_conf = *p;
-	return 0;
-}
-
-static struct file_operations pfm_proc_fops = {
-	.open		= pfm_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-int __init
-pfm_init(void)
-{
-	unsigned int n, n_counters, i;
-
-	printk("perfmon: version %u.%u IRQ %u\n",
-		PFM_VERSION_MAJ,
-		PFM_VERSION_MIN,
-		IA64_PERFMON_VECTOR);
-
-	if (pfm_probe_pmu()) {
-		printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 
-				local_cpu_data->family);
-		return -ENODEV;
-	}
-
-	/*
-	 * compute the number of implemented PMD/PMC from the
-	 * description tables
-	 */
-	n = 0;
-	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
-		if (PMC_IS_IMPL(i) == 0) continue;
-		pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
-		n++;
-	}
-	pmu_conf->num_pmcs = n;
-
-	n = 0; n_counters = 0;
-	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
-		if (PMD_IS_IMPL(i) == 0) continue;
-		pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
-		n++;
-		if (PMD_IS_COUNTING(i)) n_counters++;
-	}
-	pmu_conf->num_pmds      = n;
-	pmu_conf->num_counters  = n_counters;
-
-	/*
-	 * sanity checks on the number of debug registers
-	 */
-	if (pmu_conf->use_rr_dbregs) {
-		if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
-			printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
-			pmu_conf = NULL;
-			return -1;
-		}
-		if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
-			printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
-			pmu_conf = NULL;
-			return -1;
-		}
-	}
-
-	printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
-	       pmu_conf->pmu_name,
-	       pmu_conf->num_pmcs,
-	       pmu_conf->num_pmds,
-	       pmu_conf->num_counters,
-	       ffz(pmu_conf->ovfl_val));
-
-	/* sanity check */
-	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
-		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
-		pmu_conf = NULL;
-		return -1;
-	}
-
-	/*
-	 * create /proc/perfmon (mostly for debugging purposes)
-	 */
- 	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
-	if (perfmon_dir == NULL) {
-		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
-		pmu_conf = NULL;
-		return -1;
-	}
-  	/*
- 	 * install customized file operations for /proc/perfmon entry
- 	 */
- 	perfmon_dir->proc_fops = &pfm_proc_fops;
-
-	/*
-	 * create /proc/sys/kernel/perfmon (for debugging purposes)
-	 */
-	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
-
-	/*
-	 * initialize all our spinlocks
-	 */
-	spin_lock_init(&pfm_sessions.pfs_lock);
-	spin_lock_init(&pfm_buffer_fmt_lock);
-
-	init_pfm_fs();
-
-	for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
-
-	return 0;
-}
-
-__initcall(pfm_init);
-
-/*
- * this function is called before pfm_init()
- */
-void
-pfm_init_percpu (void)
-{
-	static int first_time=1;
-	/*
-	 * make sure no measurement is active
-	 * (may inherit programmed PMCs from EFI).
-	 */
-	pfm_clear_psr_pp();
-	pfm_clear_psr_up();
-
-	/*
-	 * we run with the PMU not frozen at all times
-	 */
-	pfm_unfreeze_pmu();
-
-	if (first_time) {
-		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
-		first_time=0;
-	}
-
-	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
-	ia64_srlz_d();
-}
-
-/*
- * used for debug purposes only
- */
-void
-dump_pmu_state(const char *from)
-{
-	struct task_struct *task;
-	struct thread_struct *t;
-	struct pt_regs *regs;
-	pfm_context_t *ctx;
-	unsigned long psr, dcr, info, flags;
-	int i, this_cpu;
-
-	local_irq_save(flags);
-
-	this_cpu = smp_processor_id();
-	regs     = task_pt_regs(current);
-	info     = PFM_CPUINFO_GET();
-	dcr      = ia64_getreg(_IA64_REG_CR_DCR);
-
-	if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
-		local_irq_restore(flags);
-		return;
-	}
-
-	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
-		this_cpu, 
-		from, 
-		current->pid, 
-		regs->cr_iip,
-		current->comm);
-
-	task = GET_PMU_OWNER();
-	ctx  = GET_PMU_CTX();
-
-	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
-
-	psr = pfm_get_psr();
-
-	printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 
-		this_cpu,
-		ia64_get_pmc(0),
-		psr & IA64_PSR_PP ? 1 : 0,
-		psr & IA64_PSR_UP ? 1 : 0,
-		dcr & IA64_DCR_PP ? 1 : 0,
-		info,
-		ia64_psr(regs)->up,
-		ia64_psr(regs)->pp);
-
-	ia64_psr(regs)->up = 0;
-	ia64_psr(regs)->pp = 0;
-
-	t = &current->thread;
-
-	for (i=1; PMC_IS_LAST(i) == 0; i++) {
-		if (PMC_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
-	}
-
-	for (i=1; PMD_IS_LAST(i) == 0; i++) {
-		if (PMD_IS_IMPL(i) == 0) continue;
-		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
-	}
-
-	if (ctx) {
-		printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
-				this_cpu,
-				ctx->ctx_state,
-				ctx->ctx_smpl_vaddr,
-				ctx->ctx_smpl_hdr,
-				ctx->ctx_msgq_head,
-				ctx->ctx_msgq_tail,
-				ctx->ctx_saved_psr_up);
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * called from process.c:copy_thread(). task is new child.
- */
-void
-pfm_inherit(struct task_struct *task, struct pt_regs *regs)
-{
-	struct thread_struct *thread;
-
-	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
-
-	thread = &task->thread;
-
-	/*
-	 * cut links inherited from parent (current)
-	 */
-	thread->pfm_context = NULL;
-
-	PFM_SET_WORK_PENDING(task, 0);
-
-	/*
-	 * the psr bits are already set properly in copy_threads()
-	 */
-}
-#else  /* !CONFIG_PERFMON */
-asmlinkage long
-sys_perfmonctl (int fd, int cmd, void *arg, int count)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_PERFMON */
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
deleted file mode 100644
index c7af364b42..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
+++ /dev/null
@@ -1,1030 +0,0 @@
-/*
- * Architecture-specific setup.
- *
- * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *	Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 2000, 2004 Intel Corp
- * 	Rohit Seth <rohit.seth@intel.com>
- * 	Suresh Siddha <suresh.b.siddha@intel.com>
- * 	Gordon Jin <gordon.jin@intel.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- *
- * 12/26/04 S.Siddha, G.Jin, R.Seth
- *			Add multi-threading and multi-core detection
- * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
- * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
- * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
- * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
- * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
- * 01/07/99 S.Eranian	added the support for command line argument
- * 06/24/99 W.Drummond	added boot_cpu_data.
- * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
- */
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/reboot.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/threads.h>
-#include <linux/screen_info.h>
-#include <linux/dmi.h>
-#include <linux/serial.h>
-#include <linux/serial_core.h>
-#include <linux/efi.h>
-#include <linux/initrd.h>
-#include <linux/pm.h>
-#include <linux/cpufreq.h>
-
-#include <asm/ia32.h>
-#include <asm/machvec.h>
-#include <asm/mca.h>
-#include <asm/meminit.h>
-#include <asm/page.h>
-#include <asm/patch.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/serial.h>
-#include <asm/setup.h>
-#include <asm/smp.h>
-#include <asm/system.h>
-#include <asm/unistd.h>
-#include <asm/system.h>
-#ifdef CONFIG_XEN
-#include <asm/hypervisor.h>
-#include <asm/xen/xencomm.h>
-#include <xen/xencons.h>
-#endif
-#include <linux/dma-mapping.h>
-
-#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
-# error "struct cpuinfo_ia64 too big!"
-#endif
-
-#ifdef CONFIG_SMP
-unsigned long __per_cpu_offset[NR_CPUS];
-EXPORT_SYMBOL(__per_cpu_offset);
-#endif
-
-#ifdef CONFIG_XEN
-static void
-xen_panic_hypercall(struct unw_frame_info *info, void *arg)
-{
-	current->thread.ksp = (__u64)info->sw - 16;
-	HYPERVISOR_shutdown(SHUTDOWN_crash);
-	/* we're never actually going to get here... */
-}
-
-static int
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	unw_init_running(xen_panic_hypercall, NULL);
-	/* we're never actually going to get here... */
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block xen_panic_block = {
-	xen_panic_event, NULL, 0 /* try to go last */
-};
-
-void xen_pm_power_off(void)
-{
-	local_irq_disable();
-	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-#endif
-
-extern void ia64_setup_printk_clock(void);
-
-DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
-DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
-unsigned long ia64_cycles_per_usec;
-struct ia64_boot_param *ia64_boot_param;
-struct screen_info screen_info;
-unsigned long vga_console_iobase;
-unsigned long vga_console_membase;
-
-static struct resource data_resource = {
-	.name	= "Kernel data",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource code_resource = {
-	.name	= "Kernel code",
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-extern void efi_initialize_iomem_resources(struct resource *,
-		struct resource *);
-extern char _text[], _end[], _etext[];
-
-unsigned long ia64_max_cacheline_size;
-
-int dma_get_cache_alignment(void)
-{
-        return ia64_max_cacheline_size;
-}
-EXPORT_SYMBOL(dma_get_cache_alignment);
-
-unsigned long ia64_iobase;	/* virtual address for I/O accesses */
-EXPORT_SYMBOL(ia64_iobase);
-struct io_space io_space[MAX_IO_SPACES];
-EXPORT_SYMBOL(io_space);
-unsigned int num_io_spaces;
-
-/*
- * "flush_icache_range()" needs to know what processor dependent stride size to use
- * when it makes i-cache(s) coherent with d-caches.
- */
-#define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
-unsigned long ia64_i_cache_stride_shift = ~0;
-
-/*
- * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
- * mask specifies a mask of address bits that must be 0 in order for two buffers to be
- * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
- * address of the second buffer must be aligned to (merge_mask+1) in order to be
- * mergeable).  By default, we assume there is no I/O MMU which can merge physically
- * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
- * page-size of 2^64.
- */
-unsigned long ia64_max_iommu_merge_mask = ~0UL;
-EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
-
-/*
- * We use a special marker for the end of memory and it uses the extra (+1) slot
- */
-struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
-int num_rsvd_regions __initdata;
-
-
-/*
- * Filter incoming memory segments based on the primitive map created from the boot
- * parameters. Segments contained in the map are removed from the memory ranges. A
- * caller-specified function is called with the memory ranges that remain after filtering.
- * This routine does not assume the incoming segments are sorted.
- */
-int __init
-filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
-{
-	unsigned long range_start, range_end, prev_start;
-	void (*func)(unsigned long, unsigned long, int);
-	int i;
-
-#if IGNORE_PFN0
-	if (start == PAGE_OFFSET) {
-		printk(KERN_WARNING "warning: skipping physical page 0\n");
-		start += PAGE_SIZE;
-		if (start >= end) return 0;
-	}
-#endif
-	/*
-	 * lowest possible address(walker uses virtual)
-	 */
-	prev_start = PAGE_OFFSET;
-	func = arg;
-
-	for (i = 0; i < num_rsvd_regions; ++i) {
-		range_start = max(start, prev_start);
-		range_end   = min(end, rsvd_region[i].start);
-
-		if (range_start < range_end)
-			call_pernode_memory(__pa(range_start), range_end - range_start, func);
-
-		/* nothing more available in this segment */
-		if (range_end == end) return 0;
-
-		prev_start = rsvd_region[i].end;
-	}
-	/* end of memory marker allows full processing inside loop body */
-	return 0;
-}
-
-static void __init
-sort_regions (struct rsvd_region *rsvd_region, int max)
-{
-	int j;
-
-	/* simple bubble sorting */
-	while (max--) {
-		for (j = 0; j < max; ++j) {
-			if (rsvd_region[j].start > rsvd_region[j+1].start) {
-				struct rsvd_region tmp;
-				tmp = rsvd_region[j];
-				rsvd_region[j] = rsvd_region[j + 1];
-				rsvd_region[j + 1] = tmp;
-			}
-		}
-	}
-}
-
-/*
- * Request address space for all standard resources
- */
-static int __init register_memory(void)
-{
-	code_resource.start = ia64_tpa(_text);
-	code_resource.end   = ia64_tpa(_etext) - 1;
-	data_resource.start = ia64_tpa(_etext);
-	data_resource.end   = ia64_tpa(_end) - 1;
-	efi_initialize_iomem_resources(&code_resource, &data_resource);
-
-	return 0;
-}
-
-__initcall(register_memory);
-
-/**
- * reserve_memory - setup reserved memory areas
- *
- * Setup the reserved memory areas set aside for the boot parameters,
- * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
- * see include/asm-ia64/meminit.h if you need to define more.
- */
-void __init
-reserve_memory (void)
-{
-	int n = 0;
-
-	/*
-	 * none of the entries in this table overlap
-	 */
-	rsvd_region[n].start = (unsigned long) ia64_boot_param;
-	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
-	n++;
-
-	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
-	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
-	n++;
-
-	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
-	rsvd_region[n].end   = (rsvd_region[n].start
-				+ strlen(__va(ia64_boot_param->command_line)) + 1);
-	n++;
-
-	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
-	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
-	n++;
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		rsvd_region[n].start = (unsigned long)__va((HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
-		rsvd_region[n].end   = rsvd_region[n].start + PAGE_SIZE;
-		n++;
- 	}
-#endif
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (ia64_boot_param->initrd_start) {
-		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
-		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
-		n++;
-	}
-#endif
-
-	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
-	n++;
-
-	/* end of memory marker */
-	rsvd_region[n].start = ~0UL;
-	rsvd_region[n].end   = ~0UL;
-	n++;
-
-	num_rsvd_regions = n;
-	BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
-
-	sort_regions(rsvd_region, num_rsvd_regions);
-}
-
-/**
- * find_initrd - get initrd parameters from the boot parameter structure
- *
- * Grab the initrd start and end from the boot parameter struct given us by
- * the boot loader.
- */
-void __init
-find_initrd (void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (ia64_boot_param->initrd_start) {
-		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
-		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
-
-		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
-		       initrd_start, ia64_boot_param->initrd_size);
-	}
-#endif
-}
-
-static void __init
-io_port_init (void)
-{
-	unsigned long phys_iobase;
-
-	/*
-	 * Set `iobase' based on the EFI memory map or, failing that, the
-	 * value firmware left in ar.k0.
-	 *
-	 * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute
-	 * the port's virtual address, so ia32_load_state() loads it with a
-	 * user virtual address.  But in ia64 mode, glibc uses the
-	 * *physical* address in ar.k0 to mmap the appropriate area from
-	 * /dev/mem, and the inX()/outX() interfaces use MMIO.  In both
-	 * cases, user-mode can only use the legacy 0-64K I/O port space.
-	 *
-	 * ar.k0 is not involved in kernel I/O port accesses, which can use
-	 * any of the I/O port spaces and are done via MMIO using the
-	 * virtual mmio_base from the appropriate io_space[].
-	 */
-	phys_iobase = efi_get_iobase();
-	if (!phys_iobase) {
-		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
-		printk(KERN_INFO "No I/O port range found in EFI memory map, "
-			"falling back to AR.KR0 (0x%lx)\n", phys_iobase);
-	}
-	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
-	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
-
-	/* setup legacy IO port space */
-	io_space[0].mmio_base = ia64_iobase;
-	io_space[0].sparse = 1;
-	num_io_spaces = 1;
-}
-
-/**
- * early_console_setup - setup debugging console
- *
- * Consoles started here require little enough setup that we can start using
- * them very early in the boot process, either right after the machine
- * vector initialization, or even before if the drivers can detect their hw.
- *
- * Returns non-zero if a console couldn't be setup.
- */
-static inline int __init
-early_console_setup (char *cmdline)
-{
-	int earlycons = 0;
-
-#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
-	{
-		extern int sn_serial_console_early_setup(void);
-		if (!sn_serial_console_early_setup())
-			earlycons++;
-	}
-#endif
-#ifdef CONFIG_EFI_PCDP
-	if (!efi_setup_pcdp_console(cmdline))
-		earlycons++;
-#endif
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-	if (!early_serial_console_init(cmdline))
-		earlycons++;
-#endif
-
-	return (earlycons) ? 0 : -1;
-}
-
-static inline void
-mark_bsp_online (void)
-{
-#ifdef CONFIG_SMP
-	/* If we register an early console, allow CPU 0 to printk */
-	cpu_set(smp_processor_id(), cpu_online_map);
-#endif
-}
-
-#ifdef CONFIG_SMP
-static void __init
-check_for_logical_procs (void)
-{
-	pal_logical_to_physical_t info;
-	s64 status;
-
-	status = ia64_pal_logical_to_phys(0, &info);
-	if (status == -1) {
-		printk(KERN_INFO "No logical to physical processor mapping "
-		       "available\n");
-		return;
-	}
-	if (status) {
-		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
-		       status);
-		return;
-	}
-	/*
-	 * Total number of siblings that BSP has.  Though not all of them 
-	 * may have booted successfully. The correct number of siblings 
-	 * booted is in info.overview_num_log.
-	 */
-	smp_num_siblings = info.overview_tpc;
-	smp_num_cpucores = info.overview_cpp;
-}
-#endif
-
-static __initdata int nomca;
-static __init int setup_nomca(char *s)
-{
-	nomca = 1;
-	return 0;
-}
-early_param("nomca", setup_nomca);
-
-void __init
-setup_arch (char **cmdline_p)
-{
-	unw_init();
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		/* Must be done before any hypercall.  */
-		xencomm_init();
-
-		setup_xen_features();
-		/* Register a call for panic conditions. */
-		atomic_notifier_chain_register(&panic_notifier_list,
-		                               &xen_panic_block);
-		pm_power_off = xen_pm_power_off;
-	}
-#endif
-
-	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
-
-	*cmdline_p = __va(ia64_boot_param->command_line);
-	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
-
-	efi_init();
-	io_port_init();
-
-	parse_early_param();
-
-#ifdef CONFIG_IA64_GENERIC
-	machvec_init(NULL);
-#endif
-
-	if (early_console_setup(*cmdline_p) == 0)
-		mark_bsp_online();
-
-#ifdef CONFIG_ACPI
-	/* Initialize the ACPI boot-time table parser */
-	acpi_table_init();
-# ifdef CONFIG_ACPI_NUMA
-	acpi_numa_init();
-# endif
-#else
-# ifdef CONFIG_SMP
-	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
-# endif
-#endif /* CONFIG_APCI_BOOT */
-
-	find_memory();
-
-	/* process SAL system table: */
-	ia64_sal_init(__va(efi.sal_systab));
-
-	ia64_setup_printk_clock();
-
-#ifdef CONFIG_SMP
-	cpu_physical_id(0) = hard_smp_processor_id();
-
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
-
-	check_for_logical_procs();
-	if (smp_num_cpucores > 1)
-		printk(KERN_INFO
-		       "cpu package is Multi-Core capable: number of cores=%d\n",
-		       smp_num_cpucores);
-	if (smp_num_siblings > 1)
-		printk(KERN_INFO
-		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
-		       smp_num_siblings);
-#endif
-
-	cpu_init();	/* initialize the bootstrap CPU */
-	mmu_context_init();	/* initialize context_id bitmap */
-
-#ifdef CONFIG_ACPI
-	acpi_boot_init();
-#endif
-
-#ifdef CONFIG_VT
-	if (!conswitchp) {
-# if defined(CONFIG_DUMMY_CONSOLE)
-		conswitchp = &dummy_con;
-# endif
-# if defined(CONFIG_VGA_CONSOLE)
-		/*
-		 * Non-legacy systems may route legacy VGA MMIO range to system
-		 * memory.  vga_con probes the MMIO hole, so memory looks like
-		 * a VGA device to it.  The EFI memory map can tell us if it's
-		 * memory so we can avoid this problem.
-		 */
-		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
-			conswitchp = &vga_con;
-# endif
-	}
-#ifdef CONFIG_XEN
-	if (is_running_on_xen()) {
-		shared_info_t *s = HYPERVISOR_shared_info;
-
-		xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
-
-		printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%ld "
-		       "flags=0x%x\n", s->arch.start_info_pfn,
-		       xen_start_info->nr_pages, xen_start_info->flags);
-
-		if (!is_initial_xendomain()) {
-#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE)
-			conswitchp = NULL;
-#endif
-		}
-		
-		/*
-		 * If a console= is NOT specified, we assume using the
-		 * xencons console is desired.  By default, this is ttyS0
-		 * for dom0 and tty0 for domU.
-		 */
-		if (!strstr(*cmdline_p, "console=")) {
-			char *p, *q, name[5];
-			int offset = 0;
-
-			if (is_initial_xendomain())
-				strncpy(name, "ttyS", 4);
-			else
-				strncpy(name, "tty", 3);
-
-			p = strstr(*cmdline_p, "xencons=");
-
-			if (p) {
-				p += 8;
-				if (!strncmp(p, "ttyS", 4)) {
-					strncpy(name, p, 4);
-					p += 4;
-					offset = simple_strtol(p, &q, 10);
-					if (p == q)
-						offset = 0;
-				} else if (!strncmp(p, "tty", 3) ||
-				           !strncmp(p, "xvc", 3)) {
-					strncpy(name, p, 3);
-					p += 3;
-					offset = simple_strtol(p, &q, 10);
-					if (p == q)
-						offset = 0;
-				} else if (!strncmp(p, "off", 3))
-					offset = -1;
-			}
-
-			if (offset >= 0)
-				add_preferred_console(name, offset, NULL);
-		}
-	}
-	xencons_early_setup();
-#endif
-#endif
-
-
-	/* enable IA-64 Machine Check Abort Handling unless disabled */
-#ifdef CONFIG_XEN
-	if (is_running_on_xen() && !is_initial_xendomain())
-		nomca = 1;
-#endif
-	if (!nomca)
-		ia64_mca_init();
-
-	platform_setup(cmdline_p);
-#ifdef CONFIG_XEN
-	if (!is_running_on_xen() && !ia64_platform_is("xen")) {
-		extern ia64_mv_setup_t xen_setup;
-		xen_setup(cmdline_p);
-	}
-#endif
-	paging_init();
-#ifdef CONFIG_XEN
-	contiguous_bitmap_init(max_pfn);
-#endif
-}
-
-/*
- * Display cpu info for all cpu's.
- */
-static int
-show_cpuinfo (struct seq_file *m, void *v)
-{
-#ifdef CONFIG_SMP
-#	define lpj	c->loops_per_jiffy
-#	define cpunum	c->cpu
-#else
-#	define lpj	loops_per_jiffy
-#	define cpunum	0
-#endif
-	static struct {
-		unsigned long mask;
-		const char *feature_name;
-	} feature_bits[] = {
-		{ 1UL << 0, "branchlong" },
-		{ 1UL << 1, "spontaneous deferral"},
-		{ 1UL << 2, "16-byte atomic ops" }
-	};
-	char family[32], features[128], *cp, sep;
-	struct cpuinfo_ia64 *c = v;
-	unsigned long mask;
-	unsigned long proc_freq;
-	int i;
-
-	mask = c->features;
-
-	switch (c->family) {
-	      case 0x07:	memcpy(family, "Itanium", 8); break;
-	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
-	      default:		sprintf(family, "%u", c->family); break;
-	}
-
-	/* build the feature string: */
-	memcpy(features, " standard", 10);
-	cp = features;
-	sep = 0;
-	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
-		if (mask & feature_bits[i].mask) {
-			if (sep)
-				*cp++ = sep;
-			sep = ',';
-			*cp++ = ' ';
-			strcpy(cp, feature_bits[i].feature_name);
-			cp += strlen(feature_bits[i].feature_name);
-			mask &= ~feature_bits[i].mask;
-		}
-	}
-	if (mask) {
-		/* print unknown features as a hex value: */
-		if (sep)
-			*cp++ = sep;
-		sprintf(cp, " 0x%lx", mask);
-	}
-
-	proc_freq = cpufreq_quick_get(cpunum);
-	if (!proc_freq)
-		proc_freq = c->proc_freq / 1000;
-
-	seq_printf(m,
-		   "processor  : %d\n"
-		   "vendor     : %s\n"
-		   "arch       : IA-64\n"
-		   "family     : %s\n"
-		   "model      : %u\n"
-		   "revision   : %u\n"
-		   "archrev    : %u\n"
-		   "features   :%s\n"	/* don't change this---it _is_ right! */
-		   "cpu number : %lu\n"
-		   "cpu regs   : %u\n"
-		   "cpu MHz    : %lu.%06lu\n"
-		   "itc MHz    : %lu.%06lu\n"
-		   "BogoMIPS   : %lu.%02lu\n",
-		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
-		   features, c->ppn, c->number,
-		   proc_freq / 1000, proc_freq % 1000,
-		   c->itc_freq / 1000000, c->itc_freq % 1000000,
-		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
-#ifdef CONFIG_SMP
-	seq_printf(m, "siblings   : %u\n", cpus_weight(cpu_core_map[cpunum]));
-	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
-		seq_printf(m,
-		   	   "physical id: %u\n"
-		   	   "core id    : %u\n"
-		   	   "thread id  : %u\n",
-		   	   c->socket_id, c->core_id, c->thread_id);
-#endif
-	seq_printf(m,"\n");
-
-	return 0;
-}
-
-static void *
-c_start (struct seq_file *m, loff_t *pos)
-{
-#ifdef CONFIG_SMP
-	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
-		++*pos;
-#endif
-	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
-}
-
-static void *
-c_next (struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return c_start(m, pos);
-}
-
-static void
-c_stop (struct seq_file *m, void *v)
-{
-}
-
-struct seq_operations cpuinfo_op = {
-	.start =	c_start,
-	.next =		c_next,
-	.stop =		c_stop,
-	.show =		show_cpuinfo
-};
-
-static void __cpuinit
-identify_cpu (struct cpuinfo_ia64 *c)
-{
-	union {
-		unsigned long bits[5];
-		struct {
-			/* id 0 & 1: */
-			char vendor[16];
-
-			/* id 2 */
-			u64 ppn;		/* processor serial number */
-
-			/* id 3: */
-			unsigned number		:  8;
-			unsigned revision	:  8;
-			unsigned model		:  8;
-			unsigned family		:  8;
-			unsigned archrev	:  8;
-			unsigned reserved	: 24;
-
-			/* id 4: */
-			u64 features;
-		} field;
-	} cpuid;
-	pal_vm_info_1_u_t vm1;
-	pal_vm_info_2_u_t vm2;
-	pal_status_t status;
-	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
-	int i;
-
-	for (i = 0; i < 5; ++i)
-		cpuid.bits[i] = ia64_get_cpuid(i);
-
-	memcpy(c->vendor, cpuid.field.vendor, 16);
-#ifdef CONFIG_SMP
-	c->cpu = smp_processor_id();
-
-	/* below default values will be overwritten  by identify_siblings() 
-	 * for Multi-Threading/Multi-Core capable cpu's
-	 */
-	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
-	c->socket_id = -1;
-
-	identify_siblings(c);
-#endif
-	c->ppn = cpuid.field.ppn;
-	c->number = cpuid.field.number;
-	c->revision = cpuid.field.revision;
-	c->model = cpuid.field.model;
-	c->family = cpuid.field.family;
-	c->archrev = cpuid.field.archrev;
-	c->features = cpuid.field.features;
-
-	status = ia64_pal_vm_summary(&vm1, &vm2);
-	if (status == PAL_STATUS_SUCCESS) {
-		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
-		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
-	}
-	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
-	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
-}
-
-void
-setup_per_cpu_areas (void)
-{
-	/* start_kernel() requires this... */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-	prefill_possible_map();
-#endif
-}
-
-/*
- * Calculate the max. cache line size.
- *
- * In addition, the minimum of the i-cache stride sizes is calculated for
- * "flush_icache_range()".
- */
-static void __cpuinit
-get_max_cacheline_size (void)
-{
-	unsigned long line_size, max = 1;
-	unsigned int cache_size = 0;
-	u64 l, levels, unique_caches;
-        pal_cache_config_info_t cci;
-        s64 status;
-
-        status = ia64_pal_cache_summary(&levels, &unique_caches);
-        if (status != 0) {
-                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
-                       __FUNCTION__, status);
-                max = SMP_CACHE_BYTES;
-		/* Safest setup for "flush_icache_range()" */
-		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
-		goto out;
-        }
-
-	for (l = 0; l < levels; ++l) {
-		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
-						    &cci);
-		if (status != 0) {
-			printk(KERN_ERR
-			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
-			       __FUNCTION__, l, status);
-			max = SMP_CACHE_BYTES;
-			/* The safest setup for "flush_icache_range()" */
-			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
-			cci.pcci_unified = 1;
-		}
-		line_size = 1 << cci.pcci_line_size;
-		if (line_size > max)
-			max = line_size;
-		if (cache_size < cci.pcci_cache_size)
-			cache_size = cci.pcci_cache_size;
-		if (!cci.pcci_unified) {
-			status = ia64_pal_cache_config_info(l,
-						    /* cache_type (instruction)= */ 1,
-						    &cci);
-			if (status != 0) {
-				printk(KERN_ERR
-				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
-					__FUNCTION__, l, status);
-				/* The safest setup for "flush_icache_range()" */
-				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
-			}
-		}
-		if (cci.pcci_stride < ia64_i_cache_stride_shift)
-			ia64_i_cache_stride_shift = cci.pcci_stride;
-	}
-  out:
-#ifdef CONFIG_SMP
-	max_cache_size = max(max_cache_size, cache_size);
-#endif
-	if (max > ia64_max_cacheline_size)
-		ia64_max_cacheline_size = max;
-}
-
-/*
- * cpu_init() initializes state that is per-CPU.  This function acts
- * as a 'CPU state barrier', nothing should get across.
- */
-void __cpuinit
-cpu_init (void)
-{
-	extern void __cpuinit ia64_mmu_init (void *);
-	unsigned long num_phys_stacked;
-	pal_vm_info_2_u_t vmi;
-	unsigned int max_ctx;
-	struct cpuinfo_ia64 *cpu_info;
-	void *cpu_data;
-
-	cpu_data = per_cpu_init();
-
-	/*
-	 * We set ar.k3 so that assembly code in MCA handler can compute
-	 * physical addresses of per cpu variables with a simple:
-	 *   phys = ar.k3 + &per_cpu_var
-	 */
-	ia64_set_kr(IA64_KR_PER_CPU_DATA,
-		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
-
-	get_max_cacheline_size();
-
-	/*
-	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
-	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
-	 * depends on the data returned by identify_cpu().  We break the dependency by
-	 * accessing cpu_data() through the canonical per-CPU address.
-	 */
-	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
-	identify_cpu(cpu_info);
-
-#ifdef CONFIG_MCKINLEY
-	{
-#		define FEATURE_SET 16
-		struct ia64_pal_retval iprv;
-
-		if (cpu_info->family == 0x1f) {
-			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
-			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
-				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
-				              (iprv.v1 | 0x80), FEATURE_SET, 0);
-		}
-	}
-#endif
-
-	/* Clear the stack memory reserved for pt_regs: */
-	memset(task_pt_regs(current), 0, sizeof(struct pt_regs));
-
-	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
-
-	/*
-	 * Initialize the page-table base register to a global
-	 * directory with all zeroes.  This ensure that we can handle
-	 * TLB-misses to user address-space even before we created the
-	 * first user address-space.  This may happen, e.g., due to
-	 * aggressive use of lfetch.fault.
-	 */
-	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
-
-	/*
-	 * Initialize default control register to defer speculative faults except
-	 * for those arising from TLB misses, which are not deferred.  The
-	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
-	 * the kernel must have recovery code for all speculative accesses).  Turn on
-	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
-	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
-	 * be fine).
-	 */
-	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
-					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
-	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
-	if (current->mm)
-		BUG();
-
-	ia64_mmu_init(ia64_imva(cpu_data));
-	ia64_mca_cpu_init(ia64_imva(cpu_data));
-
-#ifdef CONFIG_IA32_SUPPORT
-	ia32_cpu_init();
-#endif
-
-	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
-	ia64_set_itc(0);
-
-	/* disable all local interrupt sources: */
-	ia64_set_itv(1 << 16);
-	ia64_set_lrr0(1 << 16);
-	ia64_set_lrr1(1 << 16);
-	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
-	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
-
-	/* clear TPR & XTP to enable all interrupt classes: */
-	ia64_setreg(_IA64_REG_CR_TPR, 0);
-#ifdef CONFIG_SMP
-	normal_xtp();
-#endif
-
-	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
-		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
-		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
-		max_ctx = (1U << 15) - 1;	/* use architected minimum */
-	}
-	while (max_ctx < ia64_ctx.max_ctx) {
-		unsigned int old = ia64_ctx.max_ctx;
-		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
-			break;
-	}
-
-	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
-		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
-		       "stacked regs\n");
-		num_phys_stacked = 96;
-	}
-	/* size of physical stacked register partition plus 8 bytes: */
-	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
-	platform_cpu_init();
-#ifdef CONFIG_XEN
-	if (is_running_on_xen() && !ia64_platform_is("xen")) {
-		extern ia64_mv_cpu_init_t xen_cpu_init;
-		xen_cpu_init();
-	}
-#endif
-
-	pm_idle = default_idle;
-}
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible.
- */
-void sched_cacheflush(void)
-{
-	ia64_sal_cache_flush(3);
-}
-
-void __init
-check_bugs (void)
-{
-	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
-			       (unsigned long) __end___mckinley_e9_bundles);
-}
-
-static int __init run_dmi_scan(void)
-{
-	dmi_scan_machine();
-	return 0;
-}
-core_initcall(run_dmi_scan);
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c b/linux-2.6-xen-sparse/arch/ia64/kernel/time.c
deleted file mode 100644
index b73cffa94f..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/time.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * linux/arch/ia64/kernel/time.c
- *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- * Copyright (C) 1999-2000 VA Linux Systems
- * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
- */
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/profile.h>
-#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/interrupt.h>
-#include <linux/efi.h>
-#include <linux/profile.h>
-#include <linux/timex.h>
-
-#include <asm/machvec.h>
-#include <asm/delay.h>
-#include <asm/hw_irq.h>
-#include <asm/ptrace.h>
-#include <asm/sal.h>
-#include <asm/sections.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_XEN
-#include <linux/kernel_stat.h>
-#include <linux/posix-timers.h>
-#include <xen/interface/vcpu.h>
-#include <asm/percpu.h>
-#endif
-
-extern unsigned long wall_jiffies;
-
-volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
-
-#ifdef CONFIG_IA64_DEBUG_IRQ
-
-unsigned long last_cli_ip;
-EXPORT_SYMBOL(last_cli_ip);
-
-#endif
-
-#ifdef CONFIG_XEN
-DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
-DEFINE_PER_CPU(unsigned long, processed_stolen_time);
-DEFINE_PER_CPU(unsigned long, processed_blocked_time);
-#define NS_PER_TICK (1000000000LL/HZ)
-#endif
-
-static struct time_interpolator itc_interpolator = {
-	.shift = 16,
-	.mask = 0xffffffffffffffffLL,
-	.source = TIME_SOURCE_CPU
-};
-
-#ifdef CONFIG_XEN
-static unsigned long 
-consider_steal_time(unsigned long new_itm, struct pt_regs *regs)
-{
-	unsigned long stolen, blocked, sched_time;
-	unsigned long delta_itm = 0, stolentick = 0;
-	int i, cpu = smp_processor_id();
-	struct vcpu_runstate_info *runstate;
-	struct task_struct *p = current;
-
-	runstate = &per_cpu(runstate, smp_processor_id());
-
-	do {
-		sched_time = runstate->state_entry_time;
-		mb();
-		stolen = runstate->time[RUNSTATE_runnable] + 
-			 runstate->time[RUNSTATE_offline] -
-			 per_cpu(processed_stolen_time, cpu);
-		blocked = runstate->time[RUNSTATE_blocked] -
-			  per_cpu(processed_blocked_time, cpu);
-		mb();
-	} while (sched_time != runstate->state_entry_time);
-
-	/*
-	 * Check for vcpu migration effect
-	 * In this case, itc value is reversed.
-	 * This causes huge stolen value.  
-	 * This function just checks and reject this effect.
-	 */
-	if (!time_after_eq(runstate->time[RUNSTATE_blocked],
-			   per_cpu(processed_blocked_time, cpu)))
-		blocked = 0;
-
-	if (!time_after_eq(runstate->time[RUNSTATE_runnable] +
-			   runstate->time[RUNSTATE_offline],
-			   per_cpu(processed_stolen_time, cpu)))
-		stolen = 0;
-
-	if (!time_after(delta_itm + new_itm, ia64_get_itc()))
-		stolentick = ia64_get_itc() - delta_itm - new_itm;
-
-	do_div(stolentick, NS_PER_TICK);
-	stolentick++;
-
-	do_div(stolen, NS_PER_TICK);
-
-	if (stolen > stolentick)
-		stolen = stolentick;
-
-	stolentick -= stolen;
-	do_div(blocked, NS_PER_TICK);
-
-	if (blocked > stolentick)
-		blocked = stolentick;
-
-	if (stolen > 0 || blocked > 0) {
-		account_steal_time(NULL, jiffies_to_cputime(stolen)); 
-		account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); 
-		run_local_timers();
-
-		if (rcu_pending(cpu))
-			rcu_check_callbacks(cpu, user_mode(regs));
-
-		scheduler_tick();
-		run_posix_cpu_timers(p);
-		delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
-
-		if (cpu == time_keeper_id) {
-			write_seqlock(&xtime_lock);
-			for(i = 0; i < stolen + blocked; i++)
-				do_timer(regs);
-			local_cpu_data->itm_next = delta_itm + new_itm;
-			write_sequnlock(&xtime_lock);
-		} else {
-			local_cpu_data->itm_next = delta_itm + new_itm;
-		}
-		per_cpu(processed_stolen_time,cpu) += NS_PER_TICK * stolen;
-		per_cpu(processed_blocked_time,cpu) += NS_PER_TICK * blocked;
-	}
-	return delta_itm; 
-}
-#else
-#define consider_steal_time(new_itm, regs) (0)
-#endif
-
-static irqreturn_t
-timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
-{
-	unsigned long new_itm;
-	unsigned long delta_itm; /* XEN */
-
-	if (unlikely(cpu_is_offline(smp_processor_id()))) {
-		return IRQ_HANDLED;
-	}
-
-	platform_timer_interrupt(irq, dev_id, regs);
-
-	new_itm = local_cpu_data->itm_next;
-
-	if (!time_after(ia64_get_itc(), new_itm))
-		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
-		       ia64_get_itc(), new_itm);
-
-	profile_tick(CPU_PROFILING, regs);
-
-	if (is_running_on_xen()) {
-		delta_itm = consider_steal_time(new_itm, regs);
-		new_itm += delta_itm;
-		if (time_after(new_itm, ia64_get_itc()) && delta_itm)
-			goto skip_process_time_accounting;
-	}
-
-	while (1) {
-		update_process_times(user_mode(regs));
-
-		new_itm += local_cpu_data->itm_delta;
-
-		if (smp_processor_id() == time_keeper_id) {
-			/*
-			 * Here we are in the timer irq handler. We have irqs locally
-			 * disabled, but we don't know if the timer_bh is running on
-			 * another CPU. We need to avoid to SMP race by acquiring the
-			 * xtime_lock.
-			 */
-			write_seqlock(&xtime_lock);
-			do_timer(regs);
-			local_cpu_data->itm_next = new_itm;
-			write_sequnlock(&xtime_lock);
-		} else
-			local_cpu_data->itm_next = new_itm;
-
-		if (time_after(new_itm, ia64_get_itc()))
-			break;
-	}
-
-skip_process_time_accounting:	/* XEN */
-
-	do {
-		/*
-		 * If we're too close to the next clock tick for
-		 * comfort, we increase the safety margin by
-		 * intentionally dropping the next tick(s).  We do NOT
-		 * update itm.next because that would force us to call
-		 * do_timer() which in turn would let our clock run
-		 * too fast (with the potentially devastating effect
-		 * of losing monotony of time).
-		 */
-		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
-			new_itm += local_cpu_data->itm_delta;
-		ia64_set_itm(new_itm);
-		/* double check, in case we got hit by a (slow) PMI: */
-	} while (time_after_eq(ia64_get_itc(), new_itm));
-	return IRQ_HANDLED;
-}
-
-/*
- * Encapsulate access to the itm structure for SMP.
- */
-void
-ia64_cpu_local_tick (void)
-{
-	int cpu = smp_processor_id();
-	unsigned long shift = 0, delta;
-
-	/* arrange for the cycle counter to generate a timer interrupt: */
-	ia64_set_itv(IA64_TIMER_VECTOR);
-
-	delta = local_cpu_data->itm_delta;
-	/*
-	 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
-	 * same time:
-	 */
-	if (cpu) {
-		unsigned long hi = 1UL << ia64_fls(cpu);
-		shift = (2*(cpu - hi) + 1) * delta/hi/2;
-	}
-	local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
-	ia64_set_itm(local_cpu_data->itm_next);
-}
-
-static int nojitter;
-
-static int __init nojitter_setup(char *str)
-{
-	nojitter = 1;
-	printk("Jitter checking for ITC timers disabled\n");
-	return 1;
-}
-
-__setup("nojitter", nojitter_setup);
-
-#ifdef CONFIG_XEN
-/* taken from i386/kernel/time-xen.c */
-static void init_missing_ticks_accounting(int cpu)
-{
-	struct vcpu_register_runstate_memory_area area;
-	struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
-	int rc;
-
-	memset(runstate, 0, sizeof(*runstate));
-
-	area.addr.v = runstate;
-	rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
-	WARN_ON(rc && rc != -ENOSYS);
-
-	per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
-	per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
-					    + runstate->time[RUNSTATE_offline];
-}
-
-static int xen_ia64_settimefoday_after_resume;
-
-static int __init __xen_ia64_settimeofday_after_resume(char *str)
-{
-	xen_ia64_settimefoday_after_resume = 1;
-	return 1;
-}
-
-__setup("xen_ia64_settimefoday_after_resume",
-	 __xen_ia64_settimeofday_after_resume);
-
-/* Called after suspend, to resume time.  */
-void
-time_resume(void)
-{
-	unsigned int cpu;
-	
-	/* Just trigger a tick.  */
-	ia64_cpu_local_tick();
-
-	if (xen_ia64_settimefoday_after_resume) {
-		/* do_settimeofday() resets timer interplator */
-		struct timespec xen_time;
-		int ret;
-		efi_gettimeofday(&xen_time);
-
-		ret = do_settimeofday(&xen_time);
-		WARN_ON(ret);
-	} else {
-#if 0
-		/* adjust EFI time */
-		struct timespec my_time = CURRENT_TIME;
-		struct timespec xen_time;
-		static timespec diff;
-		struct xen_domctl domctl;
-		int ret;
-
-		efi_gettimeofday(&xen_time);
-		diff = timespec_sub(&xen_time, &my_time);
-		domctl.cmd = XEN_DOMCTL_settimeoffset;
-		domctl.domain = DOMID_SELF;
-		domctl.u.settimeoffset.timeoffset_seconds = diff.tv_sec;
-		ret = HYPERVISOR_domctl_op(&domctl);
-		WARN_ON(ret);
-#endif
-		/* Time interpolator remembers the last timer status.
-		   Forget it */
-		write_seqlock_irq(&xtime_lock);
-		time_interpolator_reset();
-		write_sequnlock_irq(&xtime_lock);
-	}
-
-	for_each_online_cpu(cpu)
-		init_missing_ticks_accounting(cpu);
-
-	touch_softlockup_watchdog();
-}
-#else
-#define init_missing_ticks_accounting(cpu) do {} while (0)
-#endif
-
-void __devinit
-ia64_init_itm (void)
-{
-	unsigned long platform_base_freq, itc_freq;
-	struct pal_freq_ratio itc_ratio, proc_ratio;
-	long status, platform_base_drift, itc_drift;
-
-	/*
-	 * According to SAL v2.6, we need to use a SAL call to determine the platform base
-	 * frequency and then a PAL call to determine the frequency ratio between the ITC
-	 * and the base frequency.
-	 */
-	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-				    &platform_base_freq, &platform_base_drift);
-	if (status != 0) {
-		printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
-	} else {
-		status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
-		if (status != 0)
-			printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
-	}
-	if (status != 0) {
-		/* invent "random" values */
-		printk(KERN_ERR
-		       "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
-		platform_base_freq = 100000000;
-		platform_base_drift = -1;	/* no drift info */
-		itc_ratio.num = 3;
-		itc_ratio.den = 1;
-	}
-	if (platform_base_freq < 40000000) {
-		printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
-		       platform_base_freq);
-		platform_base_freq = 75000000;
-		platform_base_drift = -1;
-	}
-	if (!proc_ratio.den)
-		proc_ratio.den = 1;	/* avoid division by zero */
-	if (!itc_ratio.den)
-		itc_ratio.den = 1;	/* avoid division by zero */
-
-	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
-
-	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
-	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
-	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
-	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
-	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
-
-	if (platform_base_drift != -1) {
-		itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
-		printk("+/-%ldppm\n", itc_drift);
-	} else {
-		itc_drift = -1;
-		printk("\n");
-	}
-
-	local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
-	local_cpu_data->itc_freq = itc_freq;
-	local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
-	local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
-					+ itc_freq/2)/itc_freq;
-
-	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
-		itc_interpolator.frequency = local_cpu_data->itc_freq;
-		itc_interpolator.drift = itc_drift;
-#ifdef CONFIG_SMP
-		/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
-		 * Jitter compensation requires a cmpxchg which may limit
-		 * the scalability of the syscalls for retrieving time.
-		 * The ITC synchronization is usually successful to within a few
-		 * ITC ticks but this is not a sure thing. If you need to improve
-		 * timer performance in SMP situations then boot the kernel with the
-		 * "nojitter" option. However, doing so may result in time fluctuating (maybe
-		 * even going backward) if the ITC offsets between the individual CPUs
-		 * are too large.
-		 */
-		if (!nojitter) itc_interpolator.jitter = 1;
-#endif
-		register_time_interpolator(&itc_interpolator);
-	}
-
-	if (is_running_on_xen())
-		init_missing_ticks_accounting(smp_processor_id());
-
-	/* avoid softlock up message when cpu is unplug and plugged again. */
-	touch_softlockup_watchdog();
-
-	/* Setup the CPU local timer tick */
-	ia64_cpu_local_tick();
-}
-
-static struct irqaction timer_irqaction = {
-	.handler =	timer_interrupt,
-	.flags =	IRQF_DISABLED,
-	.name =		"timer"
-};
-
-void __devinit ia64_disable_timer(void)
-{
-	ia64_set_itv(1 << 16);
-}
-
-void __init
-time_init (void)
-{
-	register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
-	efi_gettimeofday(&xtime);
-	ia64_init_itm();
-
-	/*
-	 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
-	 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
-	 */
-	set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
-}
-
-/*
- * Generic udelay assumes that if preemption is allowed and the thread
- * migrates to another CPU, that the ITC values are synchronized across
- * all CPUs.
- */
-static void
-ia64_itc_udelay (unsigned long usecs)
-{
-	unsigned long start = ia64_get_itc();
-	unsigned long end = start + usecs*local_cpu_data->cyc_per_usec;
-
-	while (time_before(ia64_get_itc(), end))
-		cpu_relax();
-}
-
-void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay;
-
-void
-udelay (unsigned long usecs)
-{
-	(*ia64_udelay)(usecs);
-}
-EXPORT_SYMBOL(udelay);
-
-static unsigned long long ia64_itc_printk_clock(void)
-{
-	if (ia64_get_kr(IA64_KR_PER_CPU_DATA))
-		return sched_clock();
-	return 0;
-}
-
-static unsigned long long ia64_default_printk_clock(void)
-{
-	return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) *
-		(1000000000/HZ);
-}
-
-unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock;
-
-unsigned long long printk_clock(void)
-{
-	return ia64_printk_clock();
-}
-
-void __init
-ia64_setup_printk_clock(void)
-{
-	if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
-		ia64_printk_clock = ia64_itc_printk_clock;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c b/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
deleted file mode 100644
index c14ac662a3..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/mm/ioremap.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
- *	Bjorn Helgaas <bjorn.helgaas@hp.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/efi.h>
-#include <asm/io.h>
-#include <asm/meminit.h>
-
-static inline void __iomem *
-__ioremap (unsigned long offset, unsigned long size)
-{
-	offset = HYPERVISOR_ioremap(offset, size);
-	if (IS_ERR_VALUE(offset))
-		return (void __iomem*)offset;
-	return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset);
-}
-
-void __iomem *
-ioremap (unsigned long offset, unsigned long size)
-{
-	u64 attr;
-	unsigned long gran_base, gran_size;
-
-	/*
-	 * For things in kern_memmap, we must use the same attribute
-	 * as the rest of the kernel.  For more details, see
-	 * Documentation/ia64/aliasing.txt.
-	 */
-	attr = kern_mem_attribute(offset, size);
-	if (attr & EFI_MEMORY_WB)
-		return (void __iomem *) phys_to_virt(offset);
-	else if (attr & EFI_MEMORY_UC)
-		return __ioremap(offset, size);
-
-	/*
-	 * Some chipsets don't support UC access to memory.  If
-	 * WB is supported for the whole granule, we prefer that.
-	 */
-	gran_base = GRANULEROUNDDOWN(offset);
-	gran_size = GRANULEROUNDUP(offset + size) - gran_base;
-	if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
-		return (void __iomem *) phys_to_virt(offset);
-
-	return __ioremap(offset, size);
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *
-ioremap_nocache (unsigned long offset, unsigned long size)
-{
-	if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB)
-		return NULL;
-
-	return __ioremap(offset, size);
-}
-EXPORT_SYMBOL(ioremap_nocache);
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile b/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile
deleted file mode 100644
index 555d4a9d7a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
-		oprof.o cpu_buffer.o buffer_sync.o \
-		event_buffer.o oprofile_files.o \
-		oprofilefs.o oprofile_stats.o \
-		timer_int.o )
-
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_PERFMON) += perfmon.o
-ifeq ($(CONFIG_XEN), y)
-oprofile-$(CONFIG_PERFMON) += xenoprof.o \
-	../../../drivers/xen/xenoprof/xenoprofile.o
-endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c
deleted file mode 100644
index f218b7eb45..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- * @file init.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/oprofile.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include "oprofile_perfmon.h"
- 
-extern int perfmon_init(struct oprofile_operations * ops);
-extern void perfmon_exit(void);
-extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-int __init oprofile_arch_init(struct oprofile_operations * ops)
-{
-	int ret = -ENODEV;
-
-	if (is_running_on_xen()) {
-		ret = xen_perfmon_init();
-		if (ret)
-			return ret;
-		return xenoprofile_init(ops);
-	}
-
-#ifdef CONFIG_PERFMON
-	/* perfmon_init() can fail, but we have no way to report it */
-	ret = perfmon_init(ops);
-#endif
-	ops->backtrace = ia64_backtrace;
-
-	return ret;
-}
-
-
-void oprofile_arch_exit(void)
-{
-	if (is_running_on_xen()) {
-		xenoprofile_exit();
-		xen_perfmon_exit();
-		return;
-	}
-
-#ifdef CONFIG_PERFMON
-	perfmon_exit();
-#endif
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h b/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h
deleted file mode 100644
index 6ba1170bd6..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/oprofile_perfmon.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef OPROFILE_PERFMON_H
-#define OPROFILE_PERFMON_H
-
-#ifdef CONFIG_PERFMON
-int __perfmon_init(void);
-void __perfmon_exit(void);
-int perfmon_start(void);
-void perfmon_stop(void);
-#else
-#define __perfmon_init()	(-ENOSYS)
-#define __perfmon_exit()	do {} while (0)
-#endif /* CONFIG_PERFMON */
-
-#ifdef CONFIG_XEN
-#define STATIC_IF_NO_XEN	/* nothing */
-#define xen_perfmon_init()	__perfmon_init()
-#define xen_perfmon_exit()	__perfmon_exit()
-extern int xenoprofile_init(struct oprofile_operations * ops);
-extern void xenoprofile_exit(void);
-#else
-#define STATIC_IF_NO_XEN	static
-#define xen_perfmon_init()	(-ENOSYS)
-#define xen_perfmon_exit()	do {} while (0)
-#define xenoprofile_init()	(-ENOSYS)
-#define xenoprofile_exit()	do {} while (0)
-#endif /* CONFIG_XEN */
-
-#endif /* OPROFILE_PERFMON_H */
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c
deleted file mode 100644
index 89dc71f1c4..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/perfmon.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * @file perfmon.c
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <asm/perfmon.h>
-#include <asm/ptrace.h>
-#include <asm/errno.h>
-#include "oprofile_perfmon.h"
-
-static int allow_ints;
-
-static int
-perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
-                struct pt_regs *regs, unsigned long stamp)
-{
-	int event = arg->pmd_eventid;
- 
-	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
-
-	/* the owner of the oprofile event buffer may have exited
-	 * without perfmon being shutdown (e.g. SIGSEGV)
-	 */
-	if (allow_ints)
-		oprofile_add_sample(regs, event);
-	return 0;
-}
-
-
-STATIC_IF_NO_XEN
-int perfmon_start(void)
-{
-	allow_ints = 1;
-	return 0;
-}
-
-
-STATIC_IF_NO_XEN
-void perfmon_stop(void)
-{
-	allow_ints = 0;
-}
-
-
-#define OPROFILE_FMT_UUID { \
-	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
-
-static pfm_buffer_fmt_t oprofile_fmt = {
- 	.fmt_name 	    = "oprofile_format",
- 	.fmt_uuid	    = OPROFILE_FMT_UUID,
- 	.fmt_handler	    = perfmon_handler,
-};
-
-
-static char * get_cpu_type(void)
-{
-	__u8 family = local_cpu_data->family;
-
-	switch (family) {
-		case 0x07:
-			return "ia64/itanium";
-		case 0x1f:
-			return "ia64/itanium2";
-		default:
-			return "ia64/ia64";
-	}
-}
-
-
-/* all the ops are handled via userspace for IA64 perfmon */
-
-static int using_perfmon;
-
-STATIC_IF_NO_XEN
-int __perfmon_init(void)
-{
-	int ret = pfm_register_buffer_fmt(&oprofile_fmt);
-	if (ret)
-		return -ENODEV;
-
-	using_perfmon = 1;
-	return 0;
-}
-
-STATIC_IF_NO_XEN
-void __perfmon_exit(void)
-{
-	if (!using_perfmon)
-		return;
-
-	pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
-}
-
-int perfmon_init(struct oprofile_operations * ops)
-{
-	int ret = __perfmon_init();
-	if (ret)
-		return -ENODEV;
-
-	ops->cpu_type = get_cpu_type();
-	ops->start = perfmon_start;
-	ops->stop = perfmon_stop;
-	printk(KERN_INFO "oprofile: using perfmon.\n");
-	return 0;
-}
-
-
-void perfmon_exit(void)
-{
-	__perfmon_exit();
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c b/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c
deleted file mode 100644
index 998be3e66b..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/oprofile/xenoprof.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/******************************************************************************
- * xenoprof ia64 specific part
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-#include <linux/init.h>
-#include <linux/oprofile.h>
-#include <linux/ioport.h>
-
-#include <xen/driver_util.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/xenoprof.h>
-#include <xen/xenoprof.h>
-
-#include "oprofile_perfmon.h"
-
-void __init xenoprof_arch_init_counter(struct xenoprof_init *init)
-{
-	init->num_events = 0; /* perfmon manages. */
-}
-
-void xenoprof_arch_counter(void)
-{
-	/* nothing. perfmon does. */
-}
-
-void xenoprof_arch_start(void) 
-{
-	perfmon_start();
-}
-
-void xenoprof_arch_stop(void)
-{
-	perfmon_stop();
-}
-
-/* XXX move them to an appropriate header file. */
-struct resource* xen_ia64_allocate_resource(unsigned long size); 
-void xen_ia64_release_resource(struct resource* res); 
-void xen_ia64_unmap_resource(struct resource* res); 
-
-struct resource*
-xenoprof_ia64_allocate_resource(int32_t max_samples)
-{
-	unsigned long bufsize;
-
-	/* XXX add hypercall to get bufsize? */
-	/*     this value is taken from alloc_xenoprof_struct(). */
-#if 0
-	bufsize = NR_CPUS * (sizeof(struct xenoprof_buf) +
-			     (max_samples - 1) * sizeof(struct event_log));
-	bufsize = PAGE_ALIGN(bufsize) + PAGE_SIZE;
-#else
-#define MAX_OPROF_SHARED_PAGES 32
-	bufsize = (MAX_OPROF_SHARED_PAGES + 1) * PAGE_SIZE;
-#endif
-	return xen_ia64_allocate_resource(bufsize);
-}
-
-void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer* sbuf)
-{
-	if (sbuf->buffer) {
-		xen_ia64_unmap_resource(sbuf->arch.res);
-		sbuf->buffer = NULL;
-		sbuf->arch.res = NULL;
-	}
-}
-
-int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer* get_buffer,
-                                    struct xenoprof_shared_buffer* sbuf)
-{
-	int ret;
-	struct resource* res;
-
-	sbuf->buffer = NULL;
-	sbuf->arch.res = NULL;
-
-	res = xenoprof_ia64_allocate_resource(get_buffer->max_samples);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
-	get_buffer->buf_gmaddr = res->start;
-
-	ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, get_buffer);
-	if (ret) {
-		xen_ia64_release_resource(res);
-		return ret;
-	}
-
-	BUG_ON((res->end - res->start + 1) <
-	       get_buffer->bufsize * get_buffer->nbuf);
-
-	sbuf->buffer = __va(res->start);
-	sbuf->arch.res = res;
-
-	return ret;
-}
-
-int xenoprof_arch_set_passive(struct xenoprof_passive* pdomain,
-                              struct xenoprof_shared_buffer* sbuf)
-{
-	int ret;
-	struct resource* res;
-
-	sbuf->buffer = NULL;
-	sbuf->arch.res = NULL;
-
-	res = xenoprof_ia64_allocate_resource(pdomain->max_samples);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
-	pdomain->buf_gmaddr = res->start;
-
-	ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, pdomain);
-	if (ret) {
-		xen_ia64_release_resource(res);
-		return ret;
-	}
-
-	BUG_ON((res->end - res->start + 1) < pdomain->bufsize * pdomain->nbuf);
-
-	sbuf->buffer = __va(res->start);
-	sbuf->arch.res = res;
-
-	return ret;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c b/linux-2.6-xen-sparse/arch/ia64/pci/pci.c
deleted file mode 100644
index 4d3684156a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/pci/pci.c
+++ /dev/null
@@ -1,836 +0,0 @@
-/*
- * pci.c - Low-Level PCI Access in IA-64
- *
- * Derived from bios32.c of i386 tree.
- *
- * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P.
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *	Bjorn Helgaas <bjorn.helgaas@hp.com>
- * Copyright (C) 2004 Silicon Graphics, Inc.
- *
- * Note: Above list of copyright holders is incomplete...
- */
-
-#include <linux/acpi.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-
-#include <asm/machvec.h>
-#include <asm/page.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/sal.h>
-#include <asm/smp.h>
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-
-/*
- * Low-level SAL-based PCI configuration access functions. Note that SAL
- * calls are already serialized (via sal_lock), so we don't need another
- * synchronization mechanism here.
- */
-
-#define PCI_SAL_ADDRESS(seg, bus, devfn, reg)		\
-	(((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg))
-
-/* SAL 3.2 adds support for extended config space. */
-
-#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg)	\
-	(((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
-
-static int
-pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
-	      int reg, int len, u32 *value)
-{
-	u64 addr, data = 0;
-	int mode, result;
-
-	if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
-		return -EINVAL;
-
-	if ((seg | reg) <= 255) {
-		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
-		mode = 0;
-	} else {
-		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
-		mode = 1;
-	}
-	result = ia64_sal_pci_config_read(addr, mode, len, &data);
-	if (result != 0)
-		return -EINVAL;
-
-	*value = (u32) data;
-	return 0;
-}
-
-static int
-pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
-	       int reg, int len, u32 value)
-{
-	u64 addr;
-	int mode, result;
-
-	if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
-		return -EINVAL;
-
-	if ((seg | reg) <= 255) {
-		addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
-		mode = 0;
-	} else {
-		addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
-		mode = 1;
-	}
-	result = ia64_sal_pci_config_write(addr, mode, len, value);
-	if (result != 0)
-		return -EINVAL;
-	return 0;
-}
-
-static struct pci_raw_ops pci_sal_ops = {
-	.read =		pci_sal_read,
-	.write =	pci_sal_write
-};
-
-struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
-
-static int
-pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
-{
-	return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
-				 devfn, where, size, value);
-}
-
-static int
-pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
-{
-	return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
-				  devfn, where, size, value);
-}
-
-struct pci_ops pci_root_ops = {
-	.read = pci_read,
-	.write = pci_write,
-};
-
-/* Called by ACPI when it finds a new root bus.  */
-
-static struct pci_controller * __devinit
-alloc_pci_controller (int seg)
-{
-	struct pci_controller *controller;
-
-	controller = kmalloc(sizeof(*controller), GFP_KERNEL);
-	if (!controller)
-		return NULL;
-
-	memset(controller, 0, sizeof(*controller));
-	controller->segment = seg;
-	controller->node = -1;
-	return controller;
-}
-
-struct pci_root_info {
-	struct pci_controller *controller;
-	char *name;
-};
-
-static unsigned int
-new_space (u64 phys_base, int sparse)
-{
-	u64 mmio_base;
-	int i;
-
-	if (phys_base == 0)
-		return 0;	/* legacy I/O port space */
-
-	mmio_base = (u64) ioremap(phys_base, 0);
-	for (i = 0; i < num_io_spaces; i++)
-		if (io_space[i].mmio_base == mmio_base &&
-		    io_space[i].sparse == sparse)
-			return i;
-
-	if (num_io_spaces == MAX_IO_SPACES) {
-		printk(KERN_ERR "PCI: Too many IO port spaces "
-			"(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES);
-		return ~0;
-	}
-
-	i = num_io_spaces++;
-	io_space[i].mmio_base = mmio_base;
-	io_space[i].sparse = sparse;
-
-	return i;
-}
-
-static u64 __devinit
-add_io_space (struct pci_root_info *info, struct acpi_resource_address64 *addr)
-{
-	struct resource *resource;
-	char *name;
-	u64 base, min, max, base_port;
-	unsigned int sparse = 0, space_nr, len;
-
-	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
-	if (!resource) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space\n",
-			info->name);
-		goto out;
-	}
-
-	len = strlen(info->name) + 32;
-	name = kzalloc(len, GFP_KERNEL);
-	if (!name) {
-		printk(KERN_ERR "PCI: No memory for %s I/O port space name\n",
-			info->name);
-		goto free_resource;
-	}
-
-	min = addr->minimum;
-	max = min + addr->address_length - 1;
-	if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION)
-		sparse = 1;
-
-	space_nr = new_space(addr->translation_offset, sparse);
-	if (space_nr == ~0)
-		goto free_name;
-
-	base = __pa(io_space[space_nr].mmio_base);
-	base_port = IO_SPACE_BASE(space_nr);
-	snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name,
-		base_port + min, base_port + max);
-
-	/*
-	 * The SDM guarantees the legacy 0-64K space is sparse, but if the
-	 * mapping is done by the processor (not the bridge), ACPI may not
-	 * mark it as sparse.
-	 */
-	if (space_nr == 0)
-		sparse = 1;
-
-	resource->name  = name;
-	resource->flags = IORESOURCE_MEM;
-	resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
-	resource->end   = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
-	insert_resource(&iomem_resource, resource);
-
-	return base_port;
-
-free_name:
-	kfree(name);
-free_resource:
-	kfree(resource);
-out:
-	return ~0;
-}
-
-static acpi_status __devinit resource_to_window(struct acpi_resource *resource,
-	struct acpi_resource_address64 *addr)
-{
-	acpi_status status;
-
-	/*
-	 * We're only interested in _CRS descriptors that are
-	 *	- address space descriptors for memory or I/O space
-	 *	- non-zero size
-	 *	- producers, i.e., the address space is routed downstream,
-	 *	  not consumed by the bridge itself
-	 */
-	status = acpi_resource_to_address64(resource, addr);
-	if (ACPI_SUCCESS(status) &&
-	    (addr->resource_type == ACPI_MEMORY_RANGE ||
-	     addr->resource_type == ACPI_IO_RANGE) &&
-	    addr->address_length &&
-	    addr->producer_consumer == ACPI_PRODUCER)
-		return AE_OK;
-
-	return AE_ERROR;
-}
-
-static acpi_status __devinit
-count_window (struct acpi_resource *resource, void *data)
-{
-	unsigned int *windows = (unsigned int *) data;
-	struct acpi_resource_address64 addr;
-	acpi_status status;
-
-	status = resource_to_window(resource, &addr);
-	if (ACPI_SUCCESS(status))
-		(*windows)++;
-
-	return AE_OK;
-}
-
-static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
-{
-	struct pci_root_info *info = data;
-	struct pci_window *window;
-	struct acpi_resource_address64 addr;
-	acpi_status status;
-	unsigned long flags, offset = 0;
-	struct resource *root;
-
-	/* Return AE_OK for non-window resources to keep scanning for more */
-	status = resource_to_window(res, &addr);
-	if (!ACPI_SUCCESS(status))
-		return AE_OK;
-
-	if (addr.resource_type == ACPI_MEMORY_RANGE) {
-		flags = IORESOURCE_MEM;
-		root = &iomem_resource;
-		offset = addr.translation_offset;
-	} else if (addr.resource_type == ACPI_IO_RANGE) {
-		flags = IORESOURCE_IO;
-		root = &ioport_resource;
-		offset = add_io_space(info, &addr);
-		if (offset == ~0)
-			return AE_OK;
-	} else
-		return AE_OK;
-
-	window = &info->controller->window[info->controller->windows++];
-	window->resource.name = info->name;
-	window->resource.flags = flags;
-	window->resource.start = addr.minimum + offset;
-	window->resource.end = window->resource.start + addr.address_length - 1;
-	window->resource.child = NULL;
-	window->offset = offset;
-
-	if (insert_resource(root, &window->resource)) {
-		printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
-			window->resource.start, window->resource.end,
-			root->name, info->name);
-	}
-
-	return AE_OK;
-}
-
-static void __devinit
-pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
-{
-	int i, j;
-
-	j = 0;
-	for (i = 0; i < ctrl->windows; i++) {
-		struct resource *res = &ctrl->window[i].resource;
-		/* HP's firmware has a hack to work around a Windows bug.
-		 * Ignore these tiny memory ranges */
-		if ((res->flags & IORESOURCE_MEM) &&
-		    (res->end - res->start < 16))
-			continue;
-		if (j >= PCI_BUS_NUM_RESOURCES) {
-			printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
-					res->end, res->flags);
-			continue;
-		}
-		bus->resource[j++] = res;
-	}
-}
-
-struct pci_bus * __devinit
-pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
-{
-	struct pci_root_info info;
-	struct pci_controller *controller;
-	unsigned int windows = 0;
-	struct pci_bus *pbus;
-	char *name;
-	int pxm;
-
-	controller = alloc_pci_controller(domain);
-	if (!controller)
-		goto out1;
-
-	controller->acpi_handle = device->handle;
-
-	pxm = acpi_get_pxm(controller->acpi_handle);
-#ifdef CONFIG_NUMA
-	if (pxm >= 0)
-		controller->node = pxm_to_node(pxm);
-#endif
-
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
-			&windows);
-	controller->window = kmalloc_node(sizeof(*controller->window) * windows,
-			GFP_KERNEL, controller->node);
-	if (!controller->window)
-		goto out2;
-
-	name = kmalloc(16, GFP_KERNEL);
-	if (!name)
-		goto out3;
-
-	sprintf(name, "PCI Bus %04x:%02x", domain, bus);
-	info.controller = controller;
-	info.name = name;
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
-			&info);
-
-	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
-	if (pbus)
-		pcibios_setup_root_windows(pbus, controller);
-
-	return pbus;
-
-out3:
-	kfree(controller->window);
-out2:
-	kfree(controller);
-out1:
-	return NULL;
-}
-
-void pcibios_resource_to_bus(struct pci_dev *dev,
-		struct pci_bus_region *region, struct resource *res)
-{
-	struct pci_controller *controller = PCI_CONTROLLER(dev);
-	unsigned long offset = 0;
-	int i;
-
-	for (i = 0; i < controller->windows; i++) {
-		struct pci_window *window = &controller->window[i];
-		if (!(window->resource.flags & res->flags))
-			continue;
-		if (window->resource.start > res->start)
-			continue;
-		if (window->resource.end < res->end)
-			continue;
-		offset = window->offset;
-		break;
-	}
-
-	region->start = res->start - offset;
-	region->end = res->end - offset;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev,
-		struct resource *res, struct pci_bus_region *region)
-{
-	struct pci_controller *controller = PCI_CONTROLLER(dev);
-	unsigned long offset = 0;
-	int i;
-
-	for (i = 0; i < controller->windows; i++) {
-		struct pci_window *window = &controller->window[i];
-		if (!(window->resource.flags & res->flags))
-			continue;
-		if (window->resource.start - window->offset > region->start)
-			continue;
-		if (window->resource.end - window->offset < region->end)
-			continue;
-		offset = window->offset;
-		break;
-	}
-
-	res->start = region->start + offset;
-	res->end = region->end + offset;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
-static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
-{
-	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx];
-
-	if (!dev->bus)
-		return 0;
-	for (i=0; i<PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *busr = dev->bus->resource[i];
-
-		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
-			continue;
-		if ((devr->start) && (devr->start >= busr->start) &&
-				(devr->end <= busr->end))
-			return 1;
-	}
-	return 0;
-}
-
-static void __devinit
-pcibios_fixup_resources(struct pci_dev *dev, int start, int limit)
-{
-	struct pci_bus_region region;
-	int i;
-
-	for (i = start; i < limit; i++) {
-		if (!dev->resource[i].flags)
-			continue;
-		region.start = dev->resource[i].start;
-		region.end = dev->resource[i].end;
-		pcibios_bus_to_resource(dev, &dev->resource[i], &region);
-		if ((is_valid_resource(dev, i)))
-			pci_claim_resource(dev, i);
-	}
-}
-
-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
-{
-	pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES);
-}
-
-static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev)
-{
-	pcibios_fixup_resources(dev, PCI_BRIDGE_RESOURCES, PCI_NUM_RESOURCES);
-}
-
-/*
- *  Called after each bus is probed, but before its children are examined.
- */
-void __devinit
-pcibios_fixup_bus (struct pci_bus *b)
-{
-	struct pci_dev *dev;
-
-	if (b->self) {
-		pci_read_bridge_bases(b);
-		pcibios_fixup_bridge_resources(b->self);
-	}
-	list_for_each_entry(dev, &b->devices, bus_list)
-		pcibios_fixup_device_resources(dev);
-
-	return;
-}
-
-void __devinit
-pcibios_update_irq (struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-
-	/* ??? FIXME -- record old value for shutdown.  */
-}
-
-static inline int
-pcibios_enable_resources (struct pci_dev *dev, int mask)
-{
-	u16 cmd, old_cmd;
-	int idx;
-	struct resource *r;
-	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-
-	if (!dev)
-		return -EINVAL;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	old_cmd = cmd;
-	for (idx=0; idx<PCI_NUM_RESOURCES; idx++) {
-		/* Only set up the desired resources.  */
-		if (!(mask & (1 << idx)))
-			continue;
-
-		r = &dev->resource[idx];
-		if (!(r->flags & type_mask))
-			continue;
-		if ((idx == PCI_ROM_RESOURCE) &&
-				(!(r->flags & IORESOURCE_ROM_ENABLE)))
-			continue;
-		if (!r->start && r->end) {
-			printk(KERN_ERR
-			       "PCI: Device %s not available because of resource collisions\n",
-			       pci_name(dev));
-			return -EINVAL;
-		}
-		if (r->flags & IORESOURCE_IO)
-			cmd |= PCI_COMMAND_IO;
-		if (r->flags & IORESOURCE_MEM)
-			cmd |= PCI_COMMAND_MEMORY;
-	}
-	if (cmd != old_cmd) {
-		printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	return 0;
-}
-
-int
-pcibios_enable_device (struct pci_dev *dev, int mask)
-{
-	int ret;
-
-	ret = pcibios_enable_resources(dev, mask);
-	if (ret < 0)
-		return ret;
-
-	return acpi_pci_irq_enable(dev);
-}
-
-void
-pcibios_disable_device (struct pci_dev *dev)
-{
-	acpi_pci_irq_disable(dev);
-}
-
-void
-pcibios_align_resource (void *data, struct resource *res,
-		        resource_size_t size, resource_size_t align)
-{
-}
-
-/*
- * PCI BIOS setup, always defaults to SAL interface
- */
-char * __init
-pcibios_setup (char *str)
-{
-	return str;
-}
-
-int
-pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
-		     enum pci_mmap_state mmap_state, int write_combine)
-{
-	/*
-	 * I/O space cannot be accessed via normal processor loads and
-	 * stores on this platform.
-	 */
-	if (mmap_state == pci_mmap_io)
-		/*
-		 * XXX we could relax this for I/O spaces for which ACPI
-		 * indicates that the space is 1-to-1 mapped.  But at the
-		 * moment, we don't support multiple PCI address spaces and
-		 * the legacy I/O space is not 1-to-1 mapped, so this is moot.
-		 */
-		return -EINVAL;
-
-	/*
-	 * Leave vm_pgoff as-is, the PCI space address is the physical
-	 * address on this platform.
-	 */
-	if (write_combine && efi_range_is_wc(vma->vm_start,
-					     vma->vm_end - vma->vm_start))
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-	else
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	if (is_initial_xendomain()) {
-		unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
-		size_t size = vma->vm_end - vma->vm_start;
-		unsigned long offset = HYPERVISOR_ioremap(addr, size);
-		if (IS_ERR_VALUE(offset))
-			return offset;
-	}
-
-	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
-		return -EAGAIN;
-
-	return 0;
-}
-
-/**
- * ia64_pci_get_legacy_mem - generic legacy mem routine
- * @bus: bus to get legacy memory base address for
- *
- * Find the base of legacy memory for @bus.  This is typically the first
- * megabyte of bus address space for @bus or is simply 0 on platforms whose
- * chipsets support legacy I/O and memory routing.  Returns the base address
- * or an error pointer if an error occurred.
- *
- * This is the ia64 generic version of this routine.  Other platforms
- * are free to override it with a machine vector.
- */
-char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
-{
-	return (char *)__IA64_UNCACHED_OFFSET;
-}
-
-/**
- * pci_mmap_legacy_page_range - map legacy memory space to userland
- * @bus: bus whose legacy space we're mapping
- * @vma: vma passed in by mmap
- *
- * Map legacy memory space for this device back to userspace using a machine
- * vector to get the base address.
- */
-int
-pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
-{
-	unsigned long size = vma->vm_end - vma->vm_start;
-	pgprot_t prot;
-	char *addr;
-
-	/*
-	 * Avoid attribute aliasing.  See Documentation/ia64/aliasing.txt
-	 * for more details.
-	 */
-	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
-		return -EINVAL;
-	prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
-				    vma->vm_page_prot);
-	if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot)))
-		return -EINVAL;
-
-	addr = pci_get_legacy_mem(bus);
-	if (IS_ERR(addr))
-		return PTR_ERR(addr);
-
-	vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
-	vma->vm_page_prot = prot;
-
-	if (is_initial_xendomain()) {
-		unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
-		size_t size = vma->vm_end - vma->vm_start;
-		unsigned long offset = HYPERVISOR_ioremap(addr, size);
-		if (IS_ERR_VALUE(offset))
-			return offset;
-	}
-
-	if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			    size, vma->vm_page_prot))
-		return -EAGAIN;
-
-	return 0;
-}
-
-/**
- * ia64_pci_legacy_read - read from legacy I/O space
- * @bus: bus to read
- * @port: legacy port value
- * @val: caller allocated storage for returned value
- * @size: number of bytes to read
- *
- * Simply reads @size bytes from @port and puts the result in @val.
- *
- * Again, this (and the write routine) are generic versions that can be
- * overridden by the platform.  This is necessary on platforms that don't
- * support legacy I/O routing or that hard fail on legacy I/O timeouts.
- */
-int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
-{
-	int ret = size;
-
-	switch (size) {
-	case 1:
-		*val = inb(port);
-		break;
-	case 2:
-		*val = inw(port);
-		break;
-	case 4:
-		*val = inl(port);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-/**
- * ia64_pci_legacy_write - perform a legacy I/O write
- * @bus: bus pointer
- * @port: port to write
- * @val: value to write
- * @size: number of bytes to write from @val
- *
- * Simply writes @size bytes of @val to @port.
- */
-int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
-{
-	int ret = size;
-
-	switch (size) {
-	case 1:
-		outb(val, port);
-		break;
-	case 2:
-		outw(val, port);
-		break;
-	case 4:
-		outl(val, port);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-/**
- * pci_cacheline_size - determine cacheline size for PCI devices
- * @dev: void
- *
- * We want to use the line-size of the outer-most cache.  We assume
- * that this line-size is the same for all CPUs.
- *
- * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
- */
-static unsigned long
-pci_cacheline_size (void)
-{
-	u64 levels, unique_caches;
-	s64 status;
-	pal_cache_config_info_t cci;
-	static u8 cacheline_size;
-
-	if (cacheline_size)
-		return cacheline_size;
-
-	status = ia64_pal_cache_summary(&levels, &unique_caches);
-	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
-		       __FUNCTION__, status);
-		return SMP_CACHE_BYTES;
-	}
-
-	status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2,
-					    &cci);
-	if (status != 0) {
-		printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n",
-		       __FUNCTION__, status);
-		return SMP_CACHE_BYTES;
-	}
-	cacheline_size = 1 << cci.pcci_line_size;
-	return cacheline_size;
-}
-
-/**
- * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi()
- * @dev: the PCI device for which MWI is enabled
- *
- * For ia64, we can get the cacheline sizes from PAL.
- *
- * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
- */
-int
-pcibios_prep_mwi (struct pci_dev *dev)
-{
-	unsigned long desired_linesize, current_linesize;
-	int rc = 0;
-	u8 pci_linesize;
-
-	desired_linesize = pci_cacheline_size();
-
-	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize);
-	current_linesize = 4 * pci_linesize;
-	if (desired_linesize != current_linesize) {
-		printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,",
-		       pci_name(dev), current_linesize);
-		if (current_linesize > desired_linesize) {
-			printk(" expected %lu bytes instead\n", desired_linesize);
-			rc = -EINVAL;
-		} else {
-			printk(" correcting to %lu\n", desired_linesize);
-			pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4);
-		}
-	}
-	return rc;
-}
-
-int pci_vector_resources(int last, int nr_released)
-{
-	int count = nr_released;
-
-	count += (IA64_LAST_DEVICE_VECTOR - last);
-
-	return count;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
deleted file mode 100644
index 6d19da28df..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for Xen components
-#
-
-obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \
-	 hypervisor.o util.o xencomm.o xcom_hcall.o xcom_mini.o \
-	 xcom_privcmd.o mem.o xen_dma.o
-
-obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S b/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
deleted file mode 100644
index dc5977886e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Support routines for Xen hypercalls
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- */
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-
-GLOBAL_ENTRY(xen_get_psr)
-	XEN_HYPER_GET_PSR
-	br.ret.sptk.many rp
-    ;;
-END(xen_get_psr)
-
-GLOBAL_ENTRY(xen_get_ivr)
-	XEN_HYPER_GET_IVR
-	br.ret.sptk.many rp
-	;;
-END(xen_get_ivr)
-
-GLOBAL_ENTRY(xen_get_tpr)
-	XEN_HYPER_GET_TPR
-	br.ret.sptk.many rp
-	;;
-END(xen_get_tpr)
-
-GLOBAL_ENTRY(xen_set_tpr)
-	mov r8=r32
-	XEN_HYPER_SET_TPR
-	br.ret.sptk.many rp
-	;;
-END(xen_set_tpr)
-
-GLOBAL_ENTRY(xen_eoi)
-	mov r8=r32
-	XEN_HYPER_EOI
-	br.ret.sptk.many rp
-	;;
-END(xen_eoi)
-
-GLOBAL_ENTRY(xen_thash)
-	mov r8=r32
-	XEN_HYPER_THASH
-	br.ret.sptk.many rp
-	;;
-END(xen_thash)
-
-GLOBAL_ENTRY(xen_set_itm)
-	mov r8=r32
-	XEN_HYPER_SET_ITM
-	br.ret.sptk.many rp
-	;;
-END(xen_set_itm)
-
-GLOBAL_ENTRY(xen_ptcga)
-	mov r8=r32
-	mov r9=r33
-	XEN_HYPER_PTC_GA
-	br.ret.sptk.many rp
-	;;
-END(xen_ptcga)
-
-GLOBAL_ENTRY(xen_get_rr)
-	mov r8=r32
-	XEN_HYPER_GET_RR
-	br.ret.sptk.many rp
-	;;
-END(xen_get_rr)
-
-GLOBAL_ENTRY(xen_set_rr)
-	mov r8=r32
-	mov r9=r33
-	XEN_HYPER_SET_RR
-	br.ret.sptk.many rp
-	;;
-END(xen_set_rr)
-
-GLOBAL_ENTRY(xen_set_kr)
-	mov r8=r32
-	mov r9=r33
-	XEN_HYPER_SET_KR
-	br.ret.sptk.many rp
-END(xen_set_kr)
-
-GLOBAL_ENTRY(xen_fc)
-	mov r8=r32
-	XEN_HYPER_FC
-	br.ret.sptk.many rp
-END(xen_fc)
-
-GLOBAL_ENTRY(xen_get_cpuid)
-	mov r8=r32
-	XEN_HYPER_GET_CPUID
-	br.ret.sptk.many rp
-END(xen_get_cpuid)
-
-GLOBAL_ENTRY(xen_get_pmd)
-	mov r8=r32
-	XEN_HYPER_GET_PMD
-	br.ret.sptk.many rp
-END(xen_get_pmd)
-
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(xen_get_eflag)
-	XEN_HYPER_GET_EFLAG
-	br.ret.sptk.many rp
-END(xen_get_eflag)
-	
-// some bits aren't set if pl!=0, see SDM vol1 3.1.8
-GLOBAL_ENTRY(xen_set_eflag)
-	mov r8=r32
-	XEN_HYPER_SET_EFLAG
-	br.ret.sptk.many rp
-END(xen_set_eflag)
-#endif
-
-GLOBAL_ENTRY(xen_send_ipi)
-        mov r14=r32
-        mov r15=r33
-        mov r2=0x400
-        break 0x1000
-        ;;
-        br.ret.sptk.many rp
-        ;;
-END(xen_send_ipi)
-
-#ifdef CONFIG_XEN_IA64_VDSO_PARAVIRT
-// Those are vdso specialized.
-// In fsys mode, call, ret can't be used.
-
-	// see xen_ssm_i() in privop.h
-	// r22 = &vcpu->vcpu_info->evtchn_upcall_mask
-	// r23 = &vpsr.ic
-	// r24 = &vcpu->vcpu_info->evtchn_upcall_pending
-	// r25 = tmp
-	// r31 = tmp
-	// p11 = tmp
-	// p14 = tmp
-#define XEN_SET_PSR_I			\
-	ld1 r31=[r22];			\
-	ld1 r25=[r24];			\
-	;;				\
-	st1 [r22]=r0;			\
-	cmp.ne.unc p14,p0=r0,r31;	\
-	;;				\
-(p14)	cmp.ne.unc p11,p0=r0,r25;	\
-	;;				\
-(p11)	st1 [r22]=r20;			\
-(p11)	XEN_HYPER_SSM_I;
-		
-GLOBAL_ENTRY(xen_ssm_i_0)
-	XEN_SET_PSR_I
-	brl.cond.sptk	.vdso_ssm_i_0_ret
-	;; 
-END(xen_ssm_i_0)
-
-GLOBAL_ENTRY(xen_ssm_i_1)
-	XEN_SET_PSR_I
-	brl.cond.sptk	.vdso_ssm_i_1_ret
-	;; 
-END(xen_ssm_i_1)
-
-GLOBAL_ENTRY(__hypercall)
-	mov r2=r37
-	break 0x1000
-	br.ret.sptk.many b0
-	;; 
-END(__hypercall)
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
deleted file mode 100644
index e895ef0d96..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
+++ /dev/null
@@ -1,1264 +0,0 @@
-/******************************************************************************
- * include/asm-ia64/shadow.h
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-//#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/bootmem.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/efi.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/meminit.h>
-#include <asm/hypervisor.h>
-#include <asm/hypercall.h>
-#include <xen/interface/memory.h>
-#include <xen/xencons.h>
-#include <xen/balloon.h>
-
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
-
-start_info_t *xen_start_info;
-EXPORT_SYMBOL(xen_start_info);
-
-int running_on_xen;
-EXPORT_SYMBOL(running_on_xen);
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
-static int p2m_expose_init(void);
-#else
-#define p2m_expose_init() (-ENOSYS)
-#define p2m_expose_resume() ((void)0)
-#endif
-
-EXPORT_SYMBOL(__hypercall);
-
-void __init
-xen_setup(char **cmdline_p)
-{
-	extern void dig_setup(char **cmdline_p);
-	if (ia64_platform_is("xen"))
-		dig_setup(cmdline_p);
-	
-	if (!is_running_on_xen() || !is_initial_xendomain())
-		return;
-
-	if (xen_start_info->console.dom0.info_size >=
-	    sizeof(struct dom0_vga_console_info)) {
-		const struct dom0_vga_console_info *info =
-		        (struct dom0_vga_console_info *)(
-		                (char *)xen_start_info +
-		                xen_start_info->console.dom0.info_off);
-		dom0_init_screen_info(info);
-	}
-	xen_start_info->console.domU.mfn = 0;
-	xen_start_info->console.domU.evtchn = 0;
-}
-
-void __cpuinit
-xen_cpu_init(void)
-{
-	extern void xen_smp_intr_init(void);
-	xen_smp_intr_init();
-}
-
-//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
-// move those to lib/contiguous_bitmap?
-//XXX discontigmem/sparsemem
-
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-#ifdef CONFIG_VIRTUAL_MEM_MAP
-/* Following logic is stolen from create_mem_map_table() for virtual memmap */
-static int
-create_contiguous_bitmap(u64 start, u64 end, void *arg)
-{
-	unsigned long address, start_page, end_page;
-	unsigned long bitmap_start, bitmap_end;
-	unsigned char *bitmap;
-	int node;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	bitmap_start = (unsigned long)contiguous_bitmap +
-	               ((__pa(start) >> PAGE_SHIFT) >> 3);
-	bitmap_end = (unsigned long)contiguous_bitmap +
-	             (((__pa(end) >> PAGE_SHIFT) + 2 * BITS_PER_LONG) >> 3);
-
-	start_page = bitmap_start & PAGE_MASK;
-	end_page = PAGE_ALIGN(bitmap_end);
-	node = paddr_to_nid(__pa(start));
-
-	bitmap = alloc_bootmem_pages_node(NODE_DATA(node),
-	                                  end_page - start_page);
-	BUG_ON(!bitmap);
-	memset(bitmap, 0, end_page - start_page);
-
-	for (address = start_page; address < end_page; address += PAGE_SIZE) {
-		pgd = pgd_offset_k(address);
-		if (pgd_none(*pgd))
-			pgd_populate(&init_mm, pgd,
-			             alloc_bootmem_pages_node(NODE_DATA(node),
-			                                      PAGE_SIZE));
-		pud = pud_offset(pgd, address);
-
-		if (pud_none(*pud))
-			pud_populate(&init_mm, pud,
-			             alloc_bootmem_pages_node(NODE_DATA(node),
-			                                      PAGE_SIZE));
-		pmd = pmd_offset(pud, address);
-
-		if (pmd_none(*pmd))
-			pmd_populate_kernel(&init_mm, pmd,
-			                    alloc_bootmem_pages_node
-			                    (NODE_DATA(node), PAGE_SIZE));
-		pte = pte_offset_kernel(pmd, address);
-
-		if (pte_none(*pte))
-			set_pte(pte,
-			        pfn_pte(__pa(bitmap + (address - start_page))
-			                >> PAGE_SHIFT, PAGE_KERNEL));
-	}
-	return 0;
-}
-#endif
-
-static void
-__contiguous_bitmap_init(unsigned long size)
-{
-	contiguous_bitmap = alloc_bootmem_pages(size);
-	BUG_ON(!contiguous_bitmap);
-	memset(contiguous_bitmap, 0, size);
-}
-
-void
-contiguous_bitmap_init(unsigned long end_pfn)
-{
-	unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
-#ifndef CONFIG_VIRTUAL_MEM_MAP
-	__contiguous_bitmap_init(size);
-#else
-	unsigned long max_gap = 0;
-
-	efi_memmap_walk(find_largest_hole, (u64*)&max_gap);
-	if (max_gap < LARGE_GAP) {
-		__contiguous_bitmap_init(size);
-	} else {
-		unsigned long map_size = PAGE_ALIGN(size);
-		vmalloc_end -= map_size;
-		contiguous_bitmap = (unsigned long*)vmalloc_end;
-		efi_memmap_walk(create_contiguous_bitmap, NULL);
-	}
-#endif
-}
-
-#if 0
-int
-contiguous_bitmap_test(void* p)
-{
-	return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
-}
-#endif
-
-static void contiguous_bitmap_set(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] |=
-			((1UL<<end_off)-1) & -(1UL<<start_off);
-	} else {
-		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
-		while ( ++curr_idx < end_idx )
-			contiguous_bitmap[curr_idx] = ~0UL;
-		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
-	}
-}
-
-static void contiguous_bitmap_clear(
-	unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] &=
-			-(1UL<<end_off) | ((1UL<<start_off)-1);
-	} else {
-		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
-		while ( ++curr_idx != end_idx )
-			contiguous_bitmap[curr_idx] = 0;
-		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
-	}
-}
-
-// __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
-// are based on i386 xen_create_contiguous_region(),
-// xen_destroy_contiguous_region()
-
-/* Protected by balloon_lock. */
-#define MAX_CONTIG_ORDER 7
-static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int
-__xen_create_contiguous_region(unsigned long vstart,
-			       unsigned int order, unsigned int address_bits)
-{
-	unsigned long error = 0;
-	unsigned long gphys = __pa(vstart);
-	unsigned long start_gpfn = gphys >> PAGE_SHIFT;
-	unsigned long num_gpfn = 1 << order;
-	unsigned long i;
-	unsigned long flags;
-
-	unsigned long *in_frames = discontig_frames, out_frame;
-	int success;
-	struct xen_memory_exchange exchange = {
-		.in = {
-			.nr_extents   = num_gpfn,
-			.extent_order = 0,
-			.domid        = DOMID_SELF
-		},
-		.out = {
-			 .nr_extents   = 1,
-			 .extent_order = order,
-			 .address_bits = address_bits,
-			 .domid        = DOMID_SELF
-		 },
-		.nr_exchanged = 0
-	};
-
-	if (unlikely(order > MAX_CONTIG_ORDER))
-		return -ENOMEM;
-	
-	set_xen_guest_handle(exchange.in.extent_start, in_frames);
-	set_xen_guest_handle(exchange.out.extent_start, &out_frame);
-
-	scrub_pages(vstart, num_gpfn);
-
-	balloon_lock(flags);
-
-	/* Get a new contiguous memory extent. */
-	for (i = 0; i < num_gpfn; i++) {
-		in_frames[i] = start_gpfn + i;
-	}
-	out_frame = start_gpfn;
-	error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
-	success = (exchange.nr_exchanged == num_gpfn);
-	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
-	BUG_ON(success && (error != 0));
-	if (unlikely(error == -ENOSYS)) {
-		/* Compatibility when XENMEM_exchange is unsupported. */
-		error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-					     &exchange.in);
-		BUG_ON(error != num_gpfn);
-		error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
-					     &exchange.out);
-		if (error != 1) {
-			/* Couldn't get special memory: fall back to normal. */
-			for (i = 0; i < num_gpfn; i++) {
-				in_frames[i] = start_gpfn + i;
-			}
-			error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
-						     &exchange.in);
-			BUG_ON(error != num_gpfn);
-			success = 0;
-		} else
-			success = 1;
-	}
-	if (success)
-		contiguous_bitmap_set(start_gpfn, num_gpfn);
-#if 0
-	if (success) {
-		unsigned long mfn;
-		unsigned long mfn_prev = ~0UL;
-		for (i = 0; i < num_gpfn; i++) {
-			mfn = pfn_to_mfn_for_dma(start_gpfn + i);
-			if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
-				xprintk("\n");
-				xprintk("%s:%d order %d "
-					"start 0x%lx bus 0x%lx "
-					"machine 0x%lx\n",
-					__func__, __LINE__, order,
-					vstart, virt_to_bus((void*)vstart),
-					phys_to_machine_for_dma(gphys));
-				xprintk("mfn: ");
-				for (i = 0; i < num_gpfn; i++) {
-					mfn = pfn_to_mfn_for_dma(
-						start_gpfn + i);
-					xprintk("0x%lx ", mfn);
-				}
-				xprintk("\n");
-				break;
-			}
-			mfn_prev = mfn;
-		}
-	}
-#endif
-	balloon_unlock(flags);
-	return success? 0: -ENOMEM;
-}
-
-void
-__xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
-{
-	unsigned long flags;
-	unsigned long error = 0;
-	unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
-	unsigned long num_gpfn = 1UL << order;
-	unsigned long i;
-
-	unsigned long *out_frames = discontig_frames, in_frame;
-	int            success;
-	struct xen_memory_exchange exchange = {
-		.in = {
-			.nr_extents   = 1,
-			.extent_order = order,
-			.domid        = DOMID_SELF
-		},
-		.out = {
-			 .nr_extents   = num_gpfn,
-			 .extent_order = 0,
-			 .address_bits = 0,
-			 .domid        = DOMID_SELF
-		 },
-		.nr_exchanged = 0
-        };
-	
-
-	if (!test_bit(start_gpfn, contiguous_bitmap))
-		return;
-
-	if (unlikely(order > MAX_CONTIG_ORDER))
-		return;
-
-	set_xen_guest_handle(exchange.in.extent_start, &in_frame);
-	set_xen_guest_handle(exchange.out.extent_start, out_frames);
-
-	scrub_pages(vstart, num_gpfn);
-
-	balloon_lock(flags);
-
-	contiguous_bitmap_clear(start_gpfn, num_gpfn);
-
-        /* Do the exchange for non-contiguous MFNs. */
-	in_frame = start_gpfn;
-	for (i = 0; i < num_gpfn; i++) {
-		out_frames[i] = start_gpfn + i;
-	}
-	error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
-	success = (exchange.nr_exchanged == 1);
-	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
-	BUG_ON(success && (error != 0));
-	if (unlikely(error == -ENOSYS)) {
-                /* Compatibility when XENMEM_exchange is unsupported. */
-		error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-					     &exchange.in);
-		BUG_ON(error != 1);
-
-		error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
-					     &exchange.out);
-		BUG_ON(error != num_gpfn);
-	}
-	balloon_unlock(flags);
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// grant table hack
-// cmd: GNTTABOP_xxx
-
-#include <linux/mm.h>
-#include <xen/interface/xen.h>
-#include <xen/gnttab.h>
-
-static void
-gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
-{
-	uint32_t flags;
-
-	flags = uop->flags;
-
-	if (flags & GNTMAP_host_map) {
-		if (flags & GNTMAP_application_map) {
-			xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
-			BUG();
-		}
-		if (flags & GNTMAP_contains_pte) {
-			xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
-			BUG();
-		}
-	} else if (flags & GNTMAP_device_map) {
-		xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
-		BUG();//XXX not yet. actually this flag is not used.
-	} else {
-		BUG();
-	}
-}
-
-int
-HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
-{
-	if (cmd == GNTTABOP_map_grant_ref) {
-		unsigned int i;
-		for (i = 0; i < count; i++) {
-			gnttab_map_grant_ref_pre(
-				(struct gnttab_map_grant_ref*)uop + i);
-		}
-	}
-	return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
-}
-EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
-
-///////////////////////////////////////////////////////////////////////////
-// foreign mapping
-#include <linux/efi.h>
-#include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
-
-static unsigned long privcmd_resource_min = 0;
-// Xen/ia64 currently can handle pseudo physical address bits up to
-// (PAGE_SHIFT * 3)
-static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
-static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
-
-static unsigned long
-md_end_addr(const efi_memory_desc_t *md)
-{
-	return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-}
-
-#define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE	(1024 * 1024 * 1024UL)
-static int
-xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
-{
-	return (start < end &&
-		(end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
-}
-
-static int __init
-xen_ia64_privcmd_init(void)
-{
-	void *efi_map_start, *efi_map_end, *p;
-	u64 efi_desc_size;
-	efi_memory_desc_t *md;
-	unsigned long tmp_min;
-	unsigned long tmp_max;
-	unsigned long gap_size;
-	unsigned long prev_end;
-
-	if (!is_running_on_xen())
-		return -1;
-
-	efi_map_start = __va(ia64_boot_param->efi_memmap);
-	efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
-	efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
-	// at first check the used highest address
-	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-		// nothing
-	}
-	md = p - efi_desc_size;
-	privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
-	if (xen_ia64_privcmd_check_size(privcmd_resource_min,
-					privcmd_resource_max)) {
-		goto out;
-	}
-
-	// the used highest address is too large. try to find the largest gap.
-	tmp_min = privcmd_resource_max;
-	tmp_max = 0;
-	gap_size = 0;
-	prev_end = 0;
-	for (p = efi_map_start;
-	     p < efi_map_end - efi_desc_size;
-	     p += efi_desc_size) {
-		unsigned long end;
-		efi_memory_desc_t* next;
-		unsigned long next_start;
-
-		md = p;
-		end = md_end_addr(md);
-		if (end > privcmd_resource_max) {
-			break;
-		}
-		if (end < prev_end) {
-			// work around. 
-			// Xen may pass incompletely sorted memory
-			// descriptors like
-			// [x, x + length]
-			// [x, x]
-			// this order should be reversed.
-			continue;
-		}
-		next = p + efi_desc_size;
-		next_start = next->phys_addr;
-		if (next_start > privcmd_resource_max) {
-			next_start = privcmd_resource_max;
-		}
-		if (end < next_start && gap_size < (next_start - end)) {
-			tmp_min = end;
-			tmp_max = next_start;
-			gap_size = tmp_max - tmp_min;
-		}
-		prev_end = end;
-	}
-
-	privcmd_resource_min = GRANULEROUNDUP(tmp_min);
-	if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
-		privcmd_resource_max = tmp_max;
-		goto out;
-	}
-
-	privcmd_resource_min = tmp_min;
-	privcmd_resource_max = tmp_max;
-	if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
-					 privcmd_resource_max)) {
-		// Any large enough gap isn't found.
-		// go ahead anyway with the warning hoping that large region
-		// won't be requested.
-		printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
-	}
-
-out:
-	printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
-	       privcmd_resource_min, privcmd_resource_max, 
-	       (privcmd_resource_max - privcmd_resource_min) >> 20);
-	BUG_ON(privcmd_resource_min >= privcmd_resource_max);
-
-	// XXX this should be somewhere appropriate
-	(void)p2m_expose_init();
-
-	return 0;
-}
-late_initcall(xen_ia64_privcmd_init);
-
-struct xen_ia64_privcmd_entry {
-	atomic_t	map_count;
-#define INVALID_GPFN	(~0UL)
-	unsigned long	gpfn;
-};
-
-struct xen_ia64_privcmd_range {
-	atomic_t			ref_count;
-	unsigned long			pgoff; // in PAGE_SIZE
-	struct resource*		res;
-
-	unsigned long			num_entries;
-	struct xen_ia64_privcmd_entry	entries[0];
-};
-
-struct xen_ia64_privcmd_vma {
-	int				is_privcmd_mmapped;
-	struct xen_ia64_privcmd_range*	range;
-
-	unsigned long			num_entries;
-	struct xen_ia64_privcmd_entry*	entries;
-};
-
-static void
-xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
-{
-	atomic_set(&entry->map_count, 0);
-	entry->gpfn = INVALID_GPFN;
-}
-
-static int
-xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
-			    unsigned long addr,
-			    struct xen_ia64_privcmd_range* privcmd_range,
-			    int i,
-			    unsigned long gmfn,
-			    pgprot_t prot,
-			    domid_t domid)
-{
-	int error = 0;
-	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
-	unsigned long gpfn;
-	unsigned long flags;
-
-	if ((addr & ~PAGE_MASK) != 0 || gmfn == INVALID_MFN) {
-		error = -EINVAL;
-		goto out;
-	}
-
-	if (entry->gpfn != INVALID_GPFN) {
-		error = -EBUSY;
-		goto out;
-	}
-	gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
-
-	flags = ASSIGN_writable;
-	if (pgprot_val(prot) == PROT_READ) {
-		flags = ASSIGN_readonly;
-	}
-	error = HYPERVISOR_add_physmap_with_gmfn(gpfn, gmfn, flags, domid);
-	if (error != 0) {
-		goto out;
-	}
-
-	prot = vma->vm_page_prot;
-	error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
-	if (error != 0) {
-		error = HYPERVISOR_zap_physmap(gpfn, 0);
-		if (error) {
-			BUG();//XXX
-		}
-	} else {
-		atomic_inc(&entry->map_count);
-		entry->gpfn = gpfn;
-	}
-
-out:
-	return error;
-}
-
-static void
-xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
-			      int i)
-{
-	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
-	unsigned long gpfn = entry->gpfn;
-	//gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
-	//	(vma->vm_pgoff - privcmd_range->pgoff);
-	int error;
-
-	error = HYPERVISOR_zap_physmap(gpfn, 0);
-	if (error) {
-		BUG();//XXX
-	}
-	entry->gpfn = INVALID_GPFN;
-}
-
-static void
-xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
-			    int i)
-{
-	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
-	if (entry->gpfn != INVALID_GPFN) {
-		atomic_inc(&entry->map_count);
-	} else {
-		BUG_ON(atomic_read(&entry->map_count) != 0);
-	}
-}
-
-static void
-xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
-			     int i)
-{
-	struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
-	if (entry->gpfn != INVALID_GPFN &&
-	    atomic_dec_and_test(&entry->map_count)) {
-		xen_ia64_privcmd_entry_munmap(privcmd_range, i);
-	}
-}
-
-static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
-static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
-
-struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
-	.open = &xen_ia64_privcmd_vma_open,
-	.close = &xen_ia64_privcmd_vma_close,
-};
-
-static void
-__xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
-			    struct xen_ia64_privcmd_vma* privcmd_vma,
-			    struct xen_ia64_privcmd_range* privcmd_range)
-{
-	unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
-	unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-	unsigned long i;
-
-	BUG_ON(entry_offset < 0);
-	BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
-
-	privcmd_vma->range = privcmd_range;
-	privcmd_vma->num_entries = num_entries;
-	privcmd_vma->entries = &privcmd_range->entries[entry_offset];
-	vma->vm_private_data = privcmd_vma;
-	for (i = 0; i < privcmd_vma->num_entries; i++) {
-		xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
-	}
-
-	vma->vm_private_data = privcmd_vma;
-	vma->vm_ops = &xen_ia64_privcmd_vm_ops;
-}
-
-static void
-xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
-{
-	struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
-	struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
-	struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
-
-	atomic_inc(&privcmd_range->ref_count);
-	// vm_op->open() can't fail.
-	privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
-	// copy original value if necessary
-	privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
-
-	__xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
-}
-
-static void
-xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
-{
-	struct xen_ia64_privcmd_vma* privcmd_vma =
-		(struct xen_ia64_privcmd_vma*)vma->vm_private_data;
-	struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
-	unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
-	unsigned long i;
-
-	for (i = 0; i < privcmd_vma->num_entries; i++) {
-		xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
-	}
-	vma->vm_private_data = NULL;
-	kfree(privcmd_vma);
-
-	if (atomic_dec_and_test(&privcmd_range->ref_count)) {
-#if 1
-		for (i = 0; i < privcmd_range->num_entries; i++) {
-			struct xen_ia64_privcmd_entry* entry =
-				&privcmd_range->entries[i];
-			BUG_ON(atomic_read(&entry->map_count) != 0);
-			BUG_ON(entry->gpfn != INVALID_GPFN);
-		}
-#endif
-		release_resource(privcmd_range->res);
-		kfree(privcmd_range->res);
-		vfree(privcmd_range);
-	}
-}
-
-int
-privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-{
-	struct xen_ia64_privcmd_vma* privcmd_vma =
-		(struct xen_ia64_privcmd_vma *)vma->vm_private_data;
-	return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
-}
-
-int
-privcmd_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	int error;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	unsigned long num_entries = size >> PAGE_SHIFT;
-	struct xen_ia64_privcmd_range* privcmd_range = NULL;
-	struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
-	struct resource* res = NULL;
-	unsigned long i;
-	BUG_ON(!is_running_on_xen());
-
-	BUG_ON(file->private_data != NULL);
-
-	error = -ENOMEM;
-	privcmd_range =
-		vmalloc(sizeof(*privcmd_range) +
-			sizeof(privcmd_range->entries[0]) * num_entries);
-	if (privcmd_range == NULL) {
-		goto out_enomem0;
-	}
-	privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
-	if (privcmd_vma == NULL) {
-		goto out_enomem1;
-	}
-	privcmd_vma->is_privcmd_mmapped = 0;
-
-	res = kzalloc(sizeof(*res), GFP_KERNEL);
-	if (res == NULL) {
-		goto out_enomem1;
-	}
-	res->name = "Xen privcmd mmap";
-	error = allocate_resource(&iomem_resource, res, size,
-				  privcmd_resource_min, privcmd_resource_max,
-				  privcmd_resource_align, NULL, NULL);
-	if (error) {
-		goto out_enomem1;
-	}
-	privcmd_range->res = res;
-
-	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
-
-	atomic_set(&privcmd_range->ref_count, 1);
-	privcmd_range->pgoff = vma->vm_pgoff;
-	privcmd_range->num_entries = num_entries;
-	for (i = 0; i < privcmd_range->num_entries; i++) {
-		xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
-	}
-
-	__xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
-	return 0;
-
-out_enomem1:
-	kfree(res);
-	kfree(privcmd_vma);
-out_enomem0:
-	vfree(privcmd_range);
-	return error;
-}
-
-int
-direct_remap_pfn_range(struct vm_area_struct *vma,
-		       unsigned long address,	// process virtual address
-		       unsigned long gmfn,	// gmfn, gmfn + 1, ... gmfn + size/PAGE_SIZE
-		       unsigned long size,
-		       pgprot_t prot,
-		       domid_t  domid)		// target domain
-{
-	struct xen_ia64_privcmd_vma* privcmd_vma =
-		(struct xen_ia64_privcmd_vma*)vma->vm_private_data;
-	struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
-	unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
-
-	unsigned long i;
-	unsigned long offset;
-	int error = 0;
-	BUG_ON(!is_running_on_xen());
-
-#if 0
-	if (prot != vm->vm_page_prot) {
-		return -EINVAL;
-	}
-#endif
-
-	i = (address - vma->vm_start) >> PAGE_SHIFT;
-	for (offset = 0; offset < size; offset += PAGE_SIZE) {
-		error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, gmfn, prot, domid);
-		if (error != 0) {
-			break;
-		}
-
-		i++;
-		gmfn++;
-        }
-
-	return error;
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-// expose p2m table
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
-#include <linux/cpu.h>
-#include <asm/uaccess.h>
-
-int p2m_initialized __read_mostly = 0;
-
-unsigned long p2m_min_low_pfn __read_mostly;
-unsigned long p2m_max_low_pfn __read_mostly;
-unsigned long p2m_convert_min_pfn __read_mostly;
-unsigned long p2m_convert_max_pfn __read_mostly;
-
-static struct resource p2m_resource = {
-	.name    = "Xen p2m table",
-	.flags   = IORESOURCE_MEM,
-};
-static unsigned long p2m_assign_start_pfn __read_mostly;
-static unsigned long p2m_assign_end_pfn __read_mostly;
-static unsigned long p2m_expose_size;	// this is referenced only when resume.
-					// so __read_mostly doesn't make sense.
-volatile const pte_t* p2m_pte __read_mostly;
-
-#define GRNULE_PFN	PTRS_PER_PTE
-static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
-
-#define ROUNDDOWN(x, y)  ((x) & ~((y) - 1))
-#define ROUNDUP(x, y)    (((x) + (y) - 1) & ~((y) - 1))
-
-#define P2M_PREFIX	"Xen p2m: "
-
-static int xen_ia64_p2m_expose __read_mostly = 1;
-module_param(xen_ia64_p2m_expose, int, 0);
-MODULE_PARM_DESC(xen_ia64_p2m_expose,
-                 "enable/disable xen/ia64 p2m exposure optimization\n");
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
-module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
-MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
-                 "use/unuse dtr to map exposed p2m table\n");
-
-static const int p2m_page_shifts[] = {
-	_PAGE_SIZE_4K,
-	_PAGE_SIZE_8K,
-	_PAGE_SIZE_16K,
-	_PAGE_SIZE_64K,
-	_PAGE_SIZE_256K,
-	_PAGE_SIZE_1M,
-	_PAGE_SIZE_4M,
-	_PAGE_SIZE_16M,
-	_PAGE_SIZE_64M,
-	_PAGE_SIZE_256M,
-};
-
-struct p2m_itr_arg {
-	unsigned long vaddr;
-	unsigned long pteval;
-	unsigned long log_page_size;
-};
-static struct p2m_itr_arg p2m_itr_arg __read_mostly;
-
-// This should be in asm-ia64/kregs.h
-#define IA64_TR_P2M_TABLE	3
-
-static void
-p2m_itr(void* info)
-{
-	struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
-	ia64_itr(0x2, IA64_TR_P2M_TABLE,
-	         arg->vaddr, arg->pteval, arg->log_page_size);
-	ia64_srlz_d();
-}
-
-static int
-p2m_expose_dtr_call(struct notifier_block *self,
-                    unsigned long event, void* ptr)
-{
-	unsigned int cpu = (unsigned int)(long)ptr;
-	if (event != CPU_ONLINE)
-		return 0;
-	if (p2m_initialized && xen_ia64_p2m_expose_use_dtr) {
-		unsigned int me = get_cpu();
-		if (cpu == me)
-			p2m_itr(&p2m_itr_arg);
-		else
-			smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg,
-						 1, 1);
-		put_cpu();
-	}
-	return 0;
-}
-
-static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
-	.notifier_call = p2m_expose_dtr_call,
-	.next          = NULL,
-	.priority      = 0
-};
-#endif
-
-static int
-p2m_expose_init(void)
-{
-	unsigned long num_pfn;
-	unsigned long p2m_size = 0;
-	unsigned long align = ~0UL;
-	int error = 0;
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-	int i;
-	unsigned long page_size;
-	unsigned long log_page_size = 0;
-#endif
-
-	if (!xen_ia64_p2m_expose)
-		return -ENOSYS;
-	if (p2m_initialized)
-		return 0;
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-	error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
-	if (error < 0)
-		return error;
-#endif
-
-	lock_cpu_hotplug();
-	if (p2m_initialized)
-		goto out;
-
-#ifdef CONFIG_DISCONTIGMEM
-	p2m_min_low_pfn = min_low_pfn;
-	p2m_max_low_pfn = max_low_pfn;
-#else
-	p2m_min_low_pfn = 0;
-	p2m_max_low_pfn = max_pfn;
-#endif
-
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-	if (xen_ia64_p2m_expose_use_dtr) {
-		unsigned long granule_pfn = 0;
-		p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
-		for (i = 0;
-		     i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
-		     i++) {
-			log_page_size = p2m_page_shifts[i];
-			page_size = 1UL << log_page_size;
-			if (page_size < p2m_size)
-				continue;
-
-			granule_pfn = max(page_size >> PAGE_SHIFT,
-			                  p2m_granule_pfn);
-			p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
-			                                granule_pfn);
-			p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
-			                              granule_pfn);
-			num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
-			p2m_expose_size = num_pfn << PAGE_SHIFT;
-			p2m_size = num_pfn / PTRS_PER_PTE;
-			p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
-			if (p2m_size == page_size)
-				break;
-		}
-		if (p2m_size != page_size) {
-			printk(KERN_ERR "p2m_size != page_size\n");
-			error = -EINVAL;
-			goto out;
-		}
-		align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
-	} else
-#endif
-	{
-		BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
-		p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
-		                                p2m_granule_pfn);
-		p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
-		num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
-		p2m_expose_size = num_pfn << PAGE_SHIFT;
-		p2m_size = num_pfn / PTRS_PER_PTE;
-		p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
-		align = max(privcmd_resource_align,
-		            p2m_granule_pfn << PAGE_SHIFT);
-	}
-	
-	// use privcmd region
-	error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
-	                          privcmd_resource_min, privcmd_resource_max,
-	                          align, NULL, NULL);
-	if (error) {
-		printk(KERN_ERR P2M_PREFIX
-		       "can't allocate region for p2m exposure "
-		       "[0x%016lx, 0x%016lx) 0x%016lx\n",
-		       p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
-		goto out;
-	}
-
-	p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
-	p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
-	
-	error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
-	                              p2m_assign_start_pfn,
-	                              p2m_expose_size, p2m_granule_pfn);
-	if (error) {
-		printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
-		       error);
-		printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
-		       "expose_size 0x%016lx granule 0x%016lx\n",
-		       p2m_convert_min_pfn, p2m_assign_start_pfn,
-		       p2m_expose_size, p2m_granule_pfn);;
-		release_resource(&p2m_resource);
-		goto out;
-	}
-	p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-	if (xen_ia64_p2m_expose_use_dtr) {
-		p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
-		                                        << PAGE_SHIFT);
-		p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
-		                                     PAGE_KERNEL));
-		p2m_itr_arg.log_page_size = log_page_size;
-		smp_mb();
-		smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
-		p2m_itr(&p2m_itr_arg);
-	}
-#endif	
-	smp_mb();
-	p2m_initialized = 1;
-	printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
-	       p2m_convert_min_pfn << PAGE_SHIFT,
-	       p2m_convert_max_pfn << PAGE_SHIFT);
-	printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
-	       p2m_assign_start_pfn << PAGE_SHIFT,
-	       p2m_assign_end_pfn << PAGE_SHIFT,
-	       p2m_size / 1024);
-out:
-	unlock_cpu_hotplug();
-	return error;
-}
-
-#ifdef notyet
-void
-p2m_expose_cleanup(void)
-{
-	BUG_ON(!p2m_initialized);
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-	unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
-#endif
-	release_resource(&p2m_resource);
-}
-#endif
-
-static void
-p2m_expose_resume(void)
-{
-	int error;
-
-	if (!xen_ia64_p2m_expose || !p2m_initialized)
-		return;
-
-	/*
-	 * We can't call {lock, unlock}_cpu_hotplug() because
-	 * they require process context.
-	 * We don't need them because we're the only one cpu and
-	 * interrupts are masked when resume.
-	 */
-	error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
-	                              p2m_assign_start_pfn,
-	                              p2m_expose_size, p2m_granule_pfn);
-	if (error) {
-		printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
-		       error);
-		printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
-		       "expose_size 0x%016lx granule 0x%016lx\n",
-		       p2m_convert_min_pfn, p2m_assign_start_pfn,
-		       p2m_expose_size, p2m_granule_pfn);;
-		p2m_initialized = 0;
-		smp_mb();
-		ia64_ptr(0x2, p2m_itr_arg.vaddr, p2m_itr_arg.log_page_size);
-		
-		/*
-		 * We can't call those clean up functions because they
-		 * require process context.
-		 */
-#if 0
-#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
-		if (xen_ia64_p2m_expose_use_dtr)
-			unregister_cpu_notifier(
-				&p2m_expose_dtr_hotplug_notifier);
-#endif
-		release_resource(&p2m_resource);
-#endif
-	}
-}
-
-//XXX inlinize?
-unsigned long
-p2m_phystomach(unsigned long gpfn)
-{
-	volatile const pte_t* pte;
-	unsigned long mfn;
-	unsigned long pteval;
-	
-	if (!p2m_initialized ||
-	    gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
-	    /* || !pfn_valid(gpfn) */)
-		return INVALID_MFN;
-	pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
-
-	mfn = INVALID_MFN;
-	if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
-	           pte_present(__pte(pteval)) &&
-	           pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
-		mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
-
-	return mfn;
-}
-
-EXPORT_SYMBOL_GPL(p2m_initialized);
-EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
-EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
-EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
-EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
-EXPORT_SYMBOL_GPL(p2m_pte);
-EXPORT_SYMBOL_GPL(p2m_phystomach);
-#endif
-
-///////////////////////////////////////////////////////////////////////////
-// for xenoprof
-
-struct resource*
-xen_ia64_allocate_resource(unsigned long size)
-{
-	struct resource* res;
-	int error;
-	
-	res = kmalloc(sizeof(*res), GFP_KERNEL);
-	if (res == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	res->name = "Xen";
-	res->flags = IORESOURCE_MEM;
-	error = allocate_resource(&iomem_resource, res, PAGE_ALIGN(size),
-	                          privcmd_resource_min, privcmd_resource_max,
-	                          IA64_GRANULE_SIZE, NULL, NULL);
-	if (error) {
-		kfree(res);
-		return ERR_PTR(error);
-	}
-	return res;
-}
-EXPORT_SYMBOL_GPL(xen_ia64_allocate_resource);
-
-void
-xen_ia64_release_resource(struct resource* res)
-{
-	release_resource(res);
-	kfree(res);
-}
-EXPORT_SYMBOL_GPL(xen_ia64_release_resource);
-
-void
-xen_ia64_unmap_resource(struct resource* res)
-{
-	unsigned long gpfn = res->start >> PAGE_SHIFT;
-	unsigned long nr_pages = (res->end - res->start) >> PAGE_SHIFT;
-	unsigned long i;
-	
-	for (i = 0; i < nr_pages; i++) {
-		int error = HYPERVISOR_zap_physmap(gpfn + i, 0);
-		if (error)
-			printk(KERN_ERR
-			       "%s:%d zap_phsymap failed %d gpfn %lx\n",
-			       __func__, __LINE__, error, gpfn + i);
-	}
-	xen_ia64_release_resource(res);
-}
-EXPORT_SYMBOL_GPL(xen_ia64_unmap_resource);
-
-///////////////////////////////////////////////////////////////////////////
-// suspend/resume
-void
-xen_post_suspend(int suspend_cancelled)
-{
-	if (suspend_cancelled)
-		return;
-	
-	p2m_expose_resume();
-	/* add more if necessary */
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c b/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c
deleted file mode 100644
index 4ad588a7c2..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/machvec.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define MACHVEC_PLATFORM_NAME           xen
-#define MACHVEC_PLATFORM_HEADER         <asm/machvec_xen.h>
-#include <asm/machvec_init.h>
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c b/linux-2.6-xen-sparse/arch/ia64/xen/mem.c
deleted file mode 100644
index dc93097c70..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/mem.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- *  Originally from linux/drivers/char/mem.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Added devfs support. 
- *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
- *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
- */
-/*
- * taken from
- * linux/drivers/char/mem.c and linux-2.6-xen-sparse/drivers/xen/char/mem.c.
- * adjusted for IA64 and made transparent.
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- */
-
-#include <linux/mm.h>
-#include <linux/efi.h>
-
-/*
- * Architectures vary in how they handle caching for addresses
- * outside of main memory.
- *
- */
-static inline int uncached_access(struct file *file, unsigned long addr)
-{
-	/*
-	 * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
-	 */
-	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-}
-
-int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
-{
-	unsigned long addr = vma->vm_pgoff << PAGE_SHIFT;
-	size_t size = vma->vm_end - vma->vm_start;
-
-
-#if 0
-	/*
-	 *XXX FIXME: linux-2.6.16.29, linux-2.6.17
-	 *    valid_mmap_phys_addr_range() in linux/arch/ia64/kernel/efi.c
-	 *    fails checks.
-	 *    linux-2.6.18.1's returns always 1. 
-	 *    Its comments says
-	 *
-         * MMIO regions are often missing from the EFI memory map.
-         * We must allow mmap of them for programs like X, so we
-         * currently can't do any useful validation.
-         */
-	if (!valid_mmap_phys_addr_range(addr, &size))
-		return -EINVAL;
-	if (size < vma->vm_end - vma->vm_start)
-		return -EINVAL;
-#endif
-
-	if (is_running_on_xen()) {
-		unsigned long offset = HYPERVISOR_ioremap(addr, size);
-		if (IS_ERR_VALUE(offset))
-			return offset;
-	}
-
-	if (uncached_access(file, vma->vm_pgoff << PAGE_SHIFT))
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-        /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
-        if (remap_pfn_range(vma,
-                            vma->vm_start,
-                            vma->vm_pgoff,
-                            size,
-                            vma->vm_page_prot))
-                return -EAGAIN;
-        return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c b/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c
deleted file mode 100644
index fbc4664bbd..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/swiotlb.c
+++ /dev/null
@@ -1,882 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is for IA-64 and EM64T platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
- *			unnecessary i-cache flushing.
- * 04/07/.. ak		Better overflow handling. Assorted fixes.
- * 05/09/10 linville	Add support for syncing ranges, support syncing for
- *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
- */
-
-#include <linux/cache.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-#include <asm/scatterlist.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#ifdef CONFIG_XEN
-/*
- * What DMA mask should Xen use to remap the bounce buffer pool?  Most
- * reports seem to indicate 30 bits is sufficient, except maybe for old
- * sound cards that we probably don't care about anyway.  If we need to,
- * we could put in some smarts to try to lower, but hopefully it's not
- * necessary.
- */
-#define DMA_BITS	(30)
-#endif
-
-#define OFFSET(val,align) ((unsigned long)	\
-	                   ( (val) & ( (align) - 1)))
-
-#define SG_ENT_VIRT_ADDRESS(sg)	(page_address((sg)->page) + (sg)->offset)
-#define SG_ENT_PHYS_ADDRESS(SG)	virt_to_bus(SG_ENT_VIRT_ADDRESS(SG))
-
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2.  What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE	128
-
-/*
- * log of the size of each IO TLB slab.  The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with.  Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb.  If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
-	SYNC_FOR_CPU = 0,
-	SYNC_FOR_DEVICE = 1,
-};
-
-int swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static char *io_tlb_start, *io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-void *io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-static unsigned char **io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
-	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
-		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-	if (*str == ',')
-		++str;
-	if (!strcmp(str, "force"))
-		swiotlb_force = 1;
-	return 1;
-}
-__setup("swiotlb=", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void
-swiotlb_init_with_default_size (size_t default_size)
-{
-	unsigned long i;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen())
-		io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs);
-#endif
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-	if (!io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
-	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-
-#ifdef CONFIG_XEN
-	for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
-		if (xen_create_contiguous_region(
-				(unsigned long)io_tlb_start +
-				(i << IO_TLB_SHIFT),
-				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
-				DMA_BITS))
-			panic("Failed to setup Xen contiguous region");
-	}
-#endif
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
-#ifdef CONFIG_XEN
-	if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
-					 get_order(io_tlb_overflow), DMA_BITS))
-		panic("Failed to setup Xen contiguous region for overflow");
-#endif
-	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
-	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-}
-
-void
-swiotlb_init (void)
-{
-	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size (size_t default_size)
-{
-	unsigned long i, req_nslabs = io_tlb_nslabs;
-	unsigned int order;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-#ifdef CONFIG_XEN
-	if (is_running_on_xen())
-		io_tlb_nslabs = roundup_pow_of_two(io_tlb_nslabs);
-#endif
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-
-	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-		                                        order);
-		if (io_tlb_start)
-			break;
-		order--;
-	}
-
-	if (!io_tlb_start)
-		goto cleanup1;
-
-	if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
-		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
-	}
-	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-	memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
-
-#ifdef CONFIG_XEN
-	for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
-		if (xen_create_contiguous_region(
-				(unsigned long)io_tlb_start +
-				(i << IO_TLB_SHIFT),
-				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
-				DMA_BITS))
-			panic("Failed to setup Xen contiguous region");
-	}
-#endif
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
-		goto cleanup2;
-
-	for (i = 0; i < io_tlb_nslabs; i++)
- 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-	io_tlb_index = 0;
-
-	io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
-	                           get_order(io_tlb_nslabs * sizeof(char *)));
-	if (!io_tlb_orig_addr)
-		goto cleanup3;
-
-	memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-	                                          get_order(io_tlb_overflow));
-	if (!io_tlb_overflow_buffer)
-		goto cleanup4;
-
-#ifdef CONFIG_XEN
-	if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
-					 get_order(io_tlb_overflow), DMA_BITS))
-		panic("Failed to setup Xen contiguous region for overflow");
-#endif
-	printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
-	       "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
-	       virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
-
-	return 0;
-
-cleanup4:
-	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
-	                                                      sizeof(char *)));
-	io_tlb_orig_addr = NULL;
-cleanup3:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
-	io_tlb_end = NULL;
-cleanup2:
-	free_pages((unsigned long)io_tlb_start, order);
-	io_tlb_start = NULL;
-cleanup1:
-	io_tlb_nslabs = req_nslabs;
-	return -ENOMEM;
-}
-
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
-{
-	dma_addr_t mask = 0xffffffff;
-	/* If the device has a mask, use it, otherwise default to 32 bits */
-	if (hwdev && hwdev->dma_mask)
-		mask = *hwdev->dma_mask;
-	return (addr & ~mask) != 0;
-}
-
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, char *buffer, size_t size, int dir)
-{
-	unsigned long flags;
-	char *dma_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-
-	/*
-	 * For mappings greater than a page, we limit the stride (and
-	 * hence alignment) to a page size.
-	 */
-	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size > PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	BUG_ON(!nslots);
-
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		wrap = index = ALIGN(io_tlb_index, stride);
-
-		if (index >= io_tlb_nslabs)
-			wrap = index = 0;
-
-		do {
-			/*
-			 * If we find a slot that indicates we have 'nslots'
-			 * number of contiguous buffers, we allocate the
-			 * buffers from that slot and mark the entries as '0'
-			 * indicating unavailable.
-			 */
-			if (io_tlb_list[index] >= nslots) {
-				int count = 0;
-
-				for (i = index; i < (int) (index + nslots); i++)
-					io_tlb_list[i] = 0;
-				for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-					io_tlb_list[i] = ++count;
-				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-				/*
-				 * Update the indices to avoid searching in
-				 * the next round.
-				 */
-				io_tlb_index = ((index + nslots) < io_tlb_nslabs
-						? (index + nslots) : 0);
-
-				goto found;
-			}
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-		} while (index != wrap);
-
-		spin_unlock_irqrestore(&io_tlb_lock, flags);
-		return NULL;
-	}
-  found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	io_tlb_orig_addr[index] = buffer;
-	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		memcpy(dma_addr, buffer, size);
-
-	return dma_addr;
-}
-
-/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
- */
-static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
-{
-	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	char *buffer = io_tlb_orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		/*
-		 * bounce... copy the data back into the original buffer * and
-		 * delete the bounce buffer.
-		 */
-		memcpy(buffer, dma_addr, size);
-
-	/*
-	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
-	 * While returning the entries to the free list, we merge the entries
-	 * with slots below and above the pool being returned.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--)
-			io_tlb_list[i] = ++count;
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
-	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
-	    int dir, int target)
-{
-	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	char *buffer = io_tlb_orig_addr[index];
-
-	switch (target) {
-	case SYNC_FOR_CPU:
-		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			memcpy(buffer, dma_addr, size);
-		else
-			BUG_ON(dir != DMA_TO_DEVICE);
-		break;
-	case SYNC_FOR_DEVICE:
-		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			memcpy(dma_addr, buffer, size);
-		else
-			BUG_ON(dir != DMA_FROM_DEVICE);
-		break;
-	default:
-		BUG();
-	}
-}
-
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
-{
-	unsigned long dev_addr;
-	void *ret;
-	int order = get_order(size);
-
-	/*
-	 * XXX fix me: the DMA API should pass us an explicit DMA mask
-	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
-	 * bit range instead of a 16MB one).
-	 */
-	flags |= GFP_DMA;
-
-	ret = (void *)__get_free_pages(flags, order);
-#ifdef CONFIG_XEN
-	if (ret && is_running_on_xen()) {
-		if (xen_create_contiguous_region((unsigned long)ret, order,
-					fls64(hwdev->coherent_dma_mask))) {
-			free_pages((unsigned long)ret, order);
-			ret = NULL;
-		} else {
-			/*
-			 * Short circuit the rest, xen_create_contiguous_region
-			 * should fail if it didn't give us an address within
-			 * the mask requested.  
-			 */
-			memset(ret, 0, size);
-			*dma_handle = virt_to_bus(ret);
-			return ret;
-		}
-	}
-#endif
-	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
-		/*
-		 * The allocated memory isn't reachable by the device.
-		 * Fall back on swiotlb_map_single().
-		 */
-		free_pages((unsigned long) ret, order);
-		ret = NULL;
-	}
-	if (!ret) {
-		/*
-		 * We are either out of memory or the device can't DMA
-		 * to GFP_DMA memory; fall back on
-		 * swiotlb_map_single(), which will grab memory from
-		 * the lowest available address range.
-		 */
-		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(handle))
-			return NULL;
-
-		ret = bus_to_virt(handle);
-	}
-
-	memset(ret, 0, size);
-	dev_addr = virt_to_bus(ret);
-
-	/* Confirm address can be DMA'd by device */
-	if (address_needs_mapping(hwdev, dev_addr)) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
-		       (unsigned long long)*hwdev->dma_mask, dev_addr);
-		panic("swiotlb_alloc_coherent: allocated memory is out of "
-		      "range for device");
-	}
-	*dma_handle = dev_addr;
-	return ret;
-}
-
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dma_handle)
-{
-	if (!(vaddr >= (void *)io_tlb_start
-                    && vaddr < (void *)io_tlb_end)) {
-#ifdef CONFIG_XEN
-		xen_destroy_contiguous_region((unsigned long)vaddr,
-					      get_order(size));
-#endif
-		free_pages((unsigned long) vaddr, get_order(size));
-	} else
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
-{
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * unless they check for dma_mapping_error (most don't)
-	 * When the mapping is small enough return a static buffer to limit
-	 * the damage, or panic when the transfer is too big.
-	 */
-	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at "
-	       "device %s\n", size, dev ? dev->bus_id : "?");
-
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Memory would be corrupted\n");
-		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Random memory would be DMAed\n");
-	}
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.  The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
-{
-	unsigned long dev_addr = virt_to_bus(ptr);
-	void *map;
-
-	BUG_ON(dir == DMA_NONE);
-	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
-	 * we can safely return the device addr and not worry about bounce
-	 * buffering it.
-	 */
-	if (!range_straddles_page_boundary(ptr, size) &&
-	    !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
-		return dev_addr;
-
-	/*
-	 * Oh well, have to allocate and map a bounce buffer.
-	 */
-	map = map_single(hwdev, ptr, size, dir);
-	if (!map) {
-		swiotlb_full(hwdev, size, dir, 1);
-		map = io_tlb_overflow_buffer;
-	}
-
-	dev_addr = virt_to_bus(map);
-
-	/*
-	 * Ensure that the address returned is DMA'ble
-	 */
-	if (address_needs_mapping(hwdev, dev_addr))
-		panic("map_single: bounce buffer is not DMA'ble");
-
-	return dev_addr;
-}
-
-/*
- * Since DMA is i-cache coherent, any (complete) pages that were written via
- * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
- * flush them when they get mapped into an executable vm-area.
- */
-static void
-mark_clean(void *addr, size_t size)
-{
-	unsigned long pg_addr, end;
-
-#ifdef CONFIG_XEN
-	/* XXX: Bad things happen when starting domUs if this is enabled. */
-	if (is_running_on_xen())
-		return;
-#endif
-
-	pg_addr = PAGE_ALIGN((unsigned long) addr);
-	end = (unsigned long) addr + size;
-	while (pg_addr + PAGE_SIZE <= end) {
-		struct page *page = virt_to_page(pg_addr);
-		set_bit(PG_arch_1, &page->flags);
-		pg_addr += PAGE_SIZE;
-	}
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-void
-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
-		     int dir)
-{
-	char *dma_addr = bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		unmap_single(hwdev, dma_addr, size, dir);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so.  At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static inline void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, int dir, int target)
-{
-	char *dma_addr = bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-			    size_t size, int dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-			       size_t size, int dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Same as above, but for a sub-range of the mapping.
- */
-static inline void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-			  unsigned long offset, size_t size,
-			  int dir, int target)
-{
-	char *dma_addr = bus_to_virt(dev_addr) + offset;
-
-	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		sync_single(hwdev, dma_addr, size, dir, target);
-	else if (dir == DMA_FROM_DEVICE)
-		mark_clean(dma_addr, size);
-}
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-				  unsigned long offset, size_t size, int dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-				     unsigned long offset, size_t size, int dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
- * interface.  Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length.  They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
- * same here.
- */
-int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
-	       int dir)
-{
-	void *addr;
-	unsigned long dev_addr;
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for (i = 0; i < nelems; i++, sg++) {
-		addr = SG_ENT_VIRT_ADDRESS(sg);
-		dev_addr = virt_to_bus(addr);
-		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
-			void *map = map_single(hwdev, addr, sg->length, dir);
-			sg->dma_address = virt_to_bus(map);
-			if (!map) {
-				/* Don't panic here, we expect map_sg users
-				   to do proper error handling. */
-				swiotlb_full(hwdev, sg->length, dir, 0);
-				swiotlb_unmap_sg(hwdev, sg - i, i, dir);
-				sg[0].dma_length = 0;
-				return 0;
-			}
-		} else
-			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
-	}
-	return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
- */
-void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
-		 int dir)
-{
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			unmap_single(hwdev, (void *) bus_to_virt(sg->dma_address), sg->dma_length, dir);
-		else if (dir == DMA_FROM_DEVICE)
-			mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static inline void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
-		int nelems, int dir, int target)
-{
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for (i = 0; i < nelems; i++, sg++)
-		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			sync_single(hwdev, (void *) sg->dma_address,
-				    sg->dma_length, dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-			int nelems, int dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-			   int nelems, int dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-}
-
-int
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
-{
-	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported (struct device *hwdev, u64 mask)
-{
-	return (virt_to_bus(io_tlb_end) - 1) <= mask;
-}
-
-EXPORT_SYMBOL(swiotlb_init);
-EXPORT_SYMBOL(swiotlb_map_single);
-EXPORT_SYMBOL(swiotlb_unmap_single);
-EXPORT_SYMBOL(swiotlb_map_sg);
-EXPORT_SYMBOL(swiotlb_unmap_sg);
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-EXPORT_SYMBOL(swiotlb_free_coherent);
-EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/util.c b/linux-2.6-xen-sparse/arch/ia64/xen/util.c
deleted file mode 100644
index 387a1c3368..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/******************************************************************************
- * arch/ia64/xen/util.c
- * This file is the ia64 counterpart of drivers/xen/util.c
- *
- * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <asm/uaccess.h>
-#include <xen/driver_util.h>
-#include <xen/interface/memory.h>
-#include <asm/hypercall.h>
-
-struct vm_struct *alloc_vm_area(unsigned long size)
-{
-	int order;
-	unsigned long virt;
-	unsigned long nr_pages;
-	struct vm_struct* area;
-	
-	order = get_order(size);
-	virt = __get_free_pages(GFP_KERNEL, order);
-	if (virt == 0) {
-		goto err0;
-	}
-	nr_pages = 1 << order;
-	scrub_pages(virt, nr_pages);
-	
-	area = kmalloc(sizeof(*area), GFP_KERNEL);
-	if (area == NULL) {
-		goto err1;
-	}
-	
-        area->flags = VM_IOREMAP;//XXX
-        area->addr = (void*)virt;
-        area->size = size;
-        area->pages = NULL; //XXX
-        area->nr_pages = nr_pages;
-        area->phys_addr = 0; 	/* xenbus_map_ring_valloc uses this field!  */
-
-	return area;
-
-err1:
-	free_pages(virt, order);
-err0:
-	return NULL;
-	
-}
-EXPORT_SYMBOL_GPL(alloc_vm_area);
-
-void free_vm_area(struct vm_struct *area)
-{
-	unsigned int order = get_order(area->size);
-	unsigned long i;
-	unsigned long phys_addr = __pa(area->addr);
-
-	// This area is used for foreign page mappping.
-	// So underlying machine page may not be assigned.
-	for (i = 0; i < (1 << order); i++) {
-		unsigned long ret;
-		unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i;
-		struct xen_memory_reservation reservation = {
-			.nr_extents   = 1,
-			.address_bits = 0,
-			.extent_order = 0,
-			.domid        = DOMID_SELF
-		};
-		set_xen_guest_handle(reservation.extent_start, &gpfn);
-		ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
-					   &reservation);
-		BUG_ON(ret != 1);
-	}
-	free_pages((unsigned long)area->addr, order);
-	kfree(area);
-}
-EXPORT_SYMBOL_GPL(free_vm_area);
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
deleted file mode 100644
index 4c90b5b01e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- *          Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/xencomm.h>
-#include <xen/interface/version.h>
-#include <xen/interface/sched.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
-#include <xen/interface/callback.h>
-#include <xen/interface/acm_ops.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/interface/xenoprof.h>
-#include <xen/interface/vcpu.h>
-#include <asm/hypercall.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/xen/xencomm.h>
-#include <asm/perfmon.h>
-
-/* Xencomm notes:
- * This file defines hypercalls to be used by xencomm.  The hypercalls simply
- * create inlines descriptors for pointers and then call the raw arch hypercall
- * xencomm_arch_hypercall_XXX
- *
- * If the arch wants to directly use these hypercalls, simply define macros
- * in asm/hypercall.h, eg:
- *  #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
- * 
- * The arch may also define HYPERVISOR_xxx as a function and do more operations
- * before/after doing the hypercall.
- *
- * Note: because only inline descriptors are created these functions must only
- * be called with in kernel memory parameters.
- */
-
-int
-xencomm_hypercall_console_io(int cmd, int count, char *str)
-{
-	return xencomm_arch_hypercall_console_io
-		(cmd, count, xencomm_create_inline(str));
-}
-
-int
-xencomm_hypercall_event_channel_op(int cmd, void *op)
-{
-	return xencomm_arch_hypercall_event_channel_op
-		(cmd, xencomm_create_inline(op));
-}
-
-int
-xencomm_hypercall_xen_version(int cmd, void *arg)
-{
-	switch (cmd) {
-	case XENVER_version:
-	case XENVER_extraversion:
-	case XENVER_compile_info:
-	case XENVER_capabilities:
-	case XENVER_changeset:
-	case XENVER_platform_parameters:
-	case XENVER_pagesize:
-	case XENVER_get_features:
-		break;
-	default:
-		printk("%s: unknown version cmd %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_xen_version
-		(cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_physdev_op(int cmd, void *op)
-{
-	return xencomm_arch_hypercall_physdev_op
-		(cmd, xencomm_create_inline(op));
-}
-
-static void *
-xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count)
-{
-	switch (cmd) {
-	case GNTTABOP_map_grant_ref:
-	case GNTTABOP_unmap_grant_ref:
-		break;
-	case GNTTABOP_setup_table:
-	{
-		struct gnttab_setup_table *setup = op;
-		struct xencomm_handle *frame_list;
-
-		frame_list = xencomm_create_inline
-			(xen_guest_handle(setup->frame_list));
-
-		set_xen_guest_handle(setup->frame_list, (void *)frame_list);
-		break;
-	}
-	case GNTTABOP_dump_table:
-	case GNTTABOP_transfer:
-	case GNTTABOP_copy:
-		break;
-	default:
-		printk("%s: unknown grant table op %d\n", __func__, cmd);
-		BUG();
-	}
-
-	return  xencomm_create_inline(op);
-}
-
-int
-xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count)
-{
-	void *desc = xencommize_grant_table_op (cmd, op, count);
-
-	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
-}
-
-int
-xencomm_hypercall_sched_op(int cmd, void *arg)
-{
-	switch (cmd) {
-	case SCHEDOP_yield:
-	case SCHEDOP_block:
-	case SCHEDOP_shutdown:
-	case SCHEDOP_remote_shutdown:
-		break;
-	case SCHEDOP_poll:
-	{
-		sched_poll_t *poll = arg;
-		struct xencomm_handle *ports;
-
-		ports = xencomm_create_inline(xen_guest_handle(poll->ports));
-
-		set_xen_guest_handle(poll->ports, (void *)ports);
-		break;
-	}
-	default:
-		printk("%s: unknown sched op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-	
-	return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_multicall(void *call_list, int nr_calls)
-{
-	int i;
-	multicall_entry_t *mce;
-
-	for (i = 0; i < nr_calls; i++) {
-		mce = (multicall_entry_t *)call_list + i;
-
-		switch (mce->op) {
-		case __HYPERVISOR_update_va_mapping:
-		case __HYPERVISOR_mmu_update:
-			/* No-op on ia64.  */
-			break;
-		case __HYPERVISOR_grant_table_op:
-			mce->args[1] = (unsigned long)xencommize_grant_table_op
-				(mce->args[0], (void *)mce->args[1],
-				 mce->args[2]);
-			break;
-		case __HYPERVISOR_memory_op:
-		default:
-			printk("%s: unhandled multicall op entry op %lu\n",
-			       __func__, mce->op);
-			return -ENOSYS;
-		}
-	}
-
-	return xencomm_arch_hypercall_multicall
-		(xencomm_create_inline(call_list), nr_calls);
-}
-
-int
-xencomm_hypercall_callback_op(int cmd, void *arg)
-{
-	switch (cmd)
-	{
-	case CALLBACKOP_register:
-	case CALLBACKOP_unregister:
-		break;
-	default:
-		printk("%s: unknown callback op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_callback_op
-		(cmd, xencomm_create_inline(arg));
-}
-
-static void
-xencommize_memory_reservation (xen_memory_reservation_t *mop)
-{
-	struct xencomm_handle *desc;
-
-	desc = xencomm_create_inline(xen_guest_handle(mop->extent_start));
-	set_xen_guest_handle(mop->extent_start, (void *)desc);
-}
-
-int
-xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
-{
-	XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2];
-	xen_memory_reservation_t *xmr = NULL, *xme_in = NULL, *xme_out = NULL;
-	xen_memory_map_t *memmap = NULL;
-	XEN_GUEST_HANDLE(void) buffer;
-	int rc;
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-		xmr = (xen_memory_reservation_t *)arg;
-		xen_guest_handle(extent_start_va[0]) =
-			xen_guest_handle(xmr->extent_start);
-		xencommize_memory_reservation((xen_memory_reservation_t *)arg);
-		break;
-		
-	case XENMEM_maximum_ram_page:
-		break;
-
-	case XENMEM_exchange:
-		xme_in  = &((xen_memory_exchange_t *)arg)->in;
-		xme_out = &((xen_memory_exchange_t *)arg)->out;
-		xen_guest_handle(extent_start_va[0]) =
-			xen_guest_handle(xme_in->extent_start);
-		xen_guest_handle(extent_start_va[1]) =
-			xen_guest_handle(xme_out->extent_start);
-		xencommize_memory_reservation
-			(&((xen_memory_exchange_t *)arg)->in);
-		xencommize_memory_reservation
-			(&((xen_memory_exchange_t *)arg)->out);
-		break;
-
-	case XENMEM_machine_memory_map:
-		memmap = (xen_memory_map_t *)arg;
-		xen_guest_handle(buffer) = xen_guest_handle(memmap->buffer);
-		set_xen_guest_handle(memmap->buffer,
-			(void *)xencomm_create_inline(
-				xen_guest_handle(memmap->buffer)));
-		break;
-
-	default:
-		printk("%s: unknown memory op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	rc =  xencomm_arch_hypercall_memory_op(cmd, xencomm_create_inline(arg));
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-		xen_guest_handle(xmr->extent_start) =
-			xen_guest_handle(extent_start_va[0]);
-		break;
-
-	case XENMEM_exchange:
-		xen_guest_handle(xme_in->extent_start) =
-			xen_guest_handle(extent_start_va[0]);
-		xen_guest_handle(xme_out->extent_start) =
-			xen_guest_handle(extent_start_va[1]);
-		break;
-
-	case XENMEM_machine_memory_map:
-		xen_guest_handle(memmap->buffer) = xen_guest_handle(buffer);
-		break;
-	}
-
-	return rc;
-}
-
-unsigned long
-xencomm_hypercall_hvm_op(int cmd, void *arg)
-{
-	switch (cmd) {
-	case HVMOP_set_param:
-	case HVMOP_get_param:
-		break;
-	default:
-		printk("%s: unknown hvm op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_suspend(unsigned long srec)
-{
-	struct sched_shutdown arg;
-
-	arg.reason = SHUTDOWN_suspend;
-
-	return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
-}
-
-int
-xencomm_hypercall_xenoprof_op(int op, void *arg)
-{
-	switch (op) {
-	case XENOPROF_init:
-	case XENOPROF_set_active:
-	case XENOPROF_set_passive:
-	case XENOPROF_counter:
-	case XENOPROF_get_buffer:
-		break;
-
-	case XENOPROF_reset_active_list:
-	case XENOPROF_reset_passive_list:
-	case XENOPROF_reserve_counters:
-	case XENOPROF_setup_events:
-	case XENOPROF_enable_virq:
-	case XENOPROF_start:
-	case XENOPROF_stop:
-	case XENOPROF_disable_virq:
-	case XENOPROF_release_counters:
-	case XENOPROF_shutdown:
-		return xencomm_arch_hypercall_xenoprof_op(op, arg);
-		break;
-
-	default:
-		printk("%s: op %d isn't supported\n", __func__, op);
-		return -ENOSYS;
-	}
-	return xencomm_arch_hypercall_xenoprof_op(op,
-						  xencomm_create_inline(arg));
-}
-
-int
-xencomm_hypercall_perfmon_op(unsigned long cmd, void* arg, unsigned long count)
-{
-	switch (cmd) {
-	case PFM_GET_FEATURES:
-	case PFM_CREATE_CONTEXT:
-	case PFM_WRITE_PMCS:
-	case PFM_WRITE_PMDS:
-	case PFM_LOAD_CONTEXT:
-		break;
-
-	case PFM_DESTROY_CONTEXT:
-	case PFM_UNLOAD_CONTEXT:
-	case PFM_START:
-	case PFM_STOP:
-		return xencomm_arch_hypercall_perfmon_op(cmd, arg, count);
-
-	default:
-		printk("%s:%d cmd %ld isn't supported\n",
-		       __func__,__LINE__, cmd);
-		BUG();
-	}
-
-	return xencomm_arch_hypercall_perfmon_op(cmd,
-	                                         xencomm_create_inline(arg),
-	                                         count);
-}
-
-long
-xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg)
-{
-	switch (cmd) {
-	case VCPUOP_register_runstate_memory_area:
-		xencommize_memory_reservation((xen_memory_reservation_t *)arg);
-		break;
-
-	default:
-		printk("%s: unknown vcpu op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return xencomm_arch_hypercall_vcpu_op(cmd, cpu,
-					      xencomm_create_inline(arg));
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
deleted file mode 100644
index 3c0baff1f0..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- *          Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/xencomm.h>
-#include <xen/interface/version.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/interface/xenoprof.h>
-#ifdef CONFIG_VMX_GUEST
-#include <asm/hypervisor.h>
-#else
-#include <asm/hypercall.h>
-#endif
-#include <asm/xen/xencomm.h>
-#include <asm/perfmon.h>
-
-int
-xencomm_mini_hypercall_event_channel_op(int cmd, void *op)
-{
-	struct xencomm_mini xc_area[2];
-	int nbr_area = 2;
-	struct xencomm_handle *desc;
-	int rc;
-
-	rc = xencomm_create_mini(xc_area, &nbr_area,
-	                         op, sizeof(evtchn_op_t), &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_event_channel_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op);
-
-static int
-xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area,
-                               unsigned int cmd, void *op, unsigned int count,
-                               struct xencomm_handle **desc)
-{
-	struct xencomm_handle *desc1;
-	unsigned int argsize;
-	int rc;
-
-	switch (cmd) {
-	case GNTTABOP_map_grant_ref:
-		argsize = sizeof(struct gnttab_map_grant_ref);
-		break;
-	case GNTTABOP_unmap_grant_ref:
-		argsize = sizeof(struct gnttab_unmap_grant_ref);
-		break;
-	case GNTTABOP_setup_table:
-	{
-		struct gnttab_setup_table *setup = op;
-
-		argsize = sizeof(*setup);
-
-		if (count != 1)
-			return -EINVAL;
-		rc = xencomm_create_mini
-			(xc_area, nbr_area,
-			 xen_guest_handle(setup->frame_list),
-			 setup->nr_frames 
-			 * sizeof(*xen_guest_handle(setup->frame_list)),
-			 &desc1);
-		if (rc)
-			return rc;
-		set_xen_guest_handle(setup->frame_list, (void *)desc1);
-		break;
-	}
-	case GNTTABOP_dump_table:
-		argsize = sizeof(struct gnttab_dump_table);
-		break;
-	case GNTTABOP_transfer:
-		argsize = sizeof(struct gnttab_transfer);
-		break;
-	case GNTTABOP_copy:
-		argsize = sizeof(struct gnttab_copy);
-		break;
-	case GNTTABOP_query_size:
-		argsize = sizeof(struct gnttab_query_size);
-		break;
-	default:
-		printk("%s: unknown mini grant table op %d\n", __func__, cmd);
-		BUG();
-	}
-
-	rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
-int
-xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
-                                      unsigned int count)
-{
-	int rc;
-	struct xencomm_handle *desc;
-	int nbr_area = 2;
-	struct xencomm_mini xc_area[2];
-
-	rc = xencommize_mini_grant_table_op(xc_area, &nbr_area,
-	                                    cmd, op, count, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op);
-
-int
-xencomm_mini_hypercall_multicall(void *call_list, int nr_calls)
-{
-	int i;
-	multicall_entry_t *mce;
-	int nbr_area = 2 + nr_calls * 3;
-	struct xencomm_mini xc_area[nbr_area];
-	struct xencomm_handle *desc;
-	int rc;
-
-	for (i = 0; i < nr_calls; i++) {
-		mce = (multicall_entry_t *)call_list + i;
-
-		switch (mce->op) {
-		case __HYPERVISOR_update_va_mapping:
-		case __HYPERVISOR_mmu_update:
-			/* No-op on ia64.  */
-			break;
-		case __HYPERVISOR_grant_table_op:
-			rc = xencommize_mini_grant_table_op
-				(xc_area, &nbr_area,
-				 mce->args[0], (void *)mce->args[1],
-				 mce->args[2], &desc);
-			if (rc)
-				return rc;
-			mce->args[1] = (unsigned long)desc;
-			break;
-		case __HYPERVISOR_memory_op:
-		default:
-			printk("%s: unhandled multicall op entry op %lu\n",
-			       __func__, mce->op);
-			return -ENOSYS;
-		}
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, call_list,
-	                         nr_calls * sizeof(multicall_entry_t), &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_multicall(desc, nr_calls);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_multicall);
-
-static int
-xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area,
-                                   xen_memory_reservation_t *mop)
-{
-	struct xencomm_handle *desc;
-	int rc;
-
-	rc = xencomm_create_mini
-		(area, nbr_area,
-		 xen_guest_handle(mop->extent_start),
-		 mop->nr_extents 
-		 * sizeof(*xen_guest_handle(mop->extent_start)),
-		 &desc);
-	if (rc)
-		return rc;
-
-	set_xen_guest_handle(mop->extent_start, (void *)desc);
-
-	return 0;
-}
-
-int
-xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg)
-{
-	int nbr_area = 4;
-	struct xencomm_mini xc_area[4];
-	struct xencomm_handle *desc;
-	int rc;
-	unsigned int argsize;
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-		argsize = sizeof(xen_memory_reservation_t);
-		rc = xencommize_mini_memory_reservation
-			(xc_area, &nbr_area, (xen_memory_reservation_t *)arg);
-		if (rc)
-			return rc;
-		break;
-		
-	case XENMEM_maximum_ram_page:
-		argsize = 0;
-		break;
-
-	case XENMEM_exchange:
-		argsize = sizeof(xen_memory_exchange_t);
-		rc = xencommize_mini_memory_reservation
-			(xc_area, &nbr_area,
-			 &((xen_memory_exchange_t *)arg)->in);
-		if (rc)
-			return rc;
-		rc = xencommize_mini_memory_reservation
-			(xc_area, &nbr_area,
-			 &((xen_memory_exchange_t *)arg)->out);
-		if (rc)
-			return rc;
-		break;
-
-	case XENMEM_add_to_physmap:
-		argsize = sizeof (xen_add_to_physmap_t);
-		break;
-
-	case XENMEM_machine_memory_map:
-	{
-		xen_memory_map_t *memmap = (xen_memory_map_t *)arg;
-		argsize = sizeof(*memmap);
-		rc = xencomm_create_mini(xc_area, &nbr_area,
-					 xen_guest_handle(memmap->buffer),
-					 memmap->nr_entries, &desc);
-		if (rc)
-			return rc;
-		set_xen_guest_handle(memmap->buffer, (void *)desc);
-		break;
-	}
-
-	default:
-		printk("%s: unknown mini memory op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_memory_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op);
-
-unsigned long
-xencomm_mini_hypercall_hvm_op(int cmd, void *arg)
-{
-	struct xencomm_handle *desc;
-	int nbr_area = 2;
-	struct xencomm_mini xc_area[2];
-	unsigned int argsize;
-	int rc;
-
-	switch (cmd) {
-	case HVMOP_get_param:
-	case HVMOP_set_param:
-		argsize = sizeof(xen_hvm_param_t);
-		break;
-	default:
-		printk("%s: unknown HVMOP %d\n", __func__, cmd);
-		return -EINVAL;
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_hvm_op(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_hvm_op);
-
-int
-xencomm_mini_hypercall_xen_version(int cmd, void *arg)
-{
-	struct xencomm_handle *desc;
-	int nbr_area = 2;
-	struct xencomm_mini xc_area[2];
-	unsigned int argsize;
-	int rc;
-
-	switch (cmd) {
-	case XENVER_version:
-		/* do not actually pass an argument */
-		return xencomm_arch_hypercall_xen_version(cmd, 0);
-	case XENVER_extraversion:
-		argsize = sizeof(xen_extraversion_t);
-		break;
-	case XENVER_compile_info:
-		argsize = sizeof(xen_compile_info_t);
-		break;
-	case XENVER_capabilities:
-		argsize = sizeof(xen_capabilities_info_t);
-		break;
-	case XENVER_changeset:
-		argsize = sizeof(xen_changeset_info_t);
-		break;
-	case XENVER_platform_parameters:
-		argsize = sizeof(xen_platform_parameters_t);
-		break;
-	case XENVER_pagesize:
-		argsize = (arg == NULL) ? 0 : sizeof(void *);
-		break;
-	case XENVER_get_features:
-		argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
-		break;
-
-	default:
-		printk("%s: unknown version op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_xen_version(cmd, desc);
-}
-EXPORT_SYMBOL(xencomm_mini_hypercall_xen_version);
-
-int
-xencomm_mini_hypercall_xenoprof_op(int op, void *arg)
-{
-	unsigned int argsize;
-	struct xencomm_mini xc_area[2];
-	int nbr_area = 2;
-	struct xencomm_handle *desc;
-	int rc;
-
-	switch (op) {
-	case XENOPROF_init:
-		argsize = sizeof(xenoprof_init_t);
-		break;
-	case XENOPROF_set_active:
-		argsize = sizeof(domid_t);
-		break;
-	case XENOPROF_set_passive:
-		argsize = sizeof(xenoprof_passive_t);
-		break;
-	case XENOPROF_counter:
-		argsize = sizeof(xenoprof_counter_t);
-		break;
-	case XENOPROF_get_buffer:
-		argsize = sizeof(xenoprof_get_buffer_t);
-		break;
-
-	case XENOPROF_reset_active_list:
-	case XENOPROF_reset_passive_list:
-	case XENOPROF_reserve_counters:
-	case XENOPROF_setup_events:
-	case XENOPROF_enable_virq:
-	case XENOPROF_start:
-	case XENOPROF_stop:
-	case XENOPROF_disable_virq:
-	case XENOPROF_release_counters:
-	case XENOPROF_shutdown:
-		return xencomm_arch_hypercall_xenoprof_op(op, arg);
-
-	default:
-		printk("%s: op %d isn't supported\n", __func__, op);
-		return -ENOSYS;
-	}
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-	return xencomm_arch_hypercall_xenoprof_op(op, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_xenoprof_op);
-
-int
-xencomm_mini_hypercall_perfmon_op(unsigned long cmd, void* arg,
-                                  unsigned long count)
-{
-	unsigned int argsize;
-	struct xencomm_mini xc_area[2];
-	int nbr_area = 2;
-	struct xencomm_handle *desc;
-	int rc;
-
-	switch (cmd) {
-	case PFM_GET_FEATURES:
-		argsize = sizeof(pfarg_features_t);
-		break;
-	case PFM_CREATE_CONTEXT:
-		argsize = sizeof(pfarg_context_t);
-		break;
-	case PFM_LOAD_CONTEXT:
-		argsize = sizeof(pfarg_load_t);
-		break;
-	case PFM_WRITE_PMCS:
-	case PFM_WRITE_PMDS:
-		argsize = sizeof(pfarg_reg_t) * count;
-		break;
-
-	case PFM_DESTROY_CONTEXT:
-	case PFM_UNLOAD_CONTEXT:
-	case PFM_START:
-	case PFM_STOP:
-		return xencomm_arch_hypercall_perfmon_op(cmd, arg, count);
-
-	default:
-		printk("%s:%d cmd %ld isn't supported\n",
-		       __func__, __LINE__, cmd);
-		BUG();
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-	return xencomm_arch_hypercall_perfmon_op(cmd, desc, count);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_perfmon_op);
-
-int
-xencomm_mini_hypercall_sched_op(int cmd, void *arg)
-{
-	int rc, nbr_area = 2;
-	struct xencomm_mini xc_area[2];
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-
-	switch (cmd) {
-	case SCHEDOP_yield:
-	case SCHEDOP_block:
-		argsize = 0;
-		break;
-	case SCHEDOP_shutdown:
-		argsize = sizeof(sched_shutdown_t);
-		break;
-	case SCHEDOP_poll:
-		argsize = sizeof(sched_poll_t);
-		break;
-	case SCHEDOP_remote_shutdown:
-		argsize = sizeof(sched_remote_shutdown_t);
-		break;
-
-	default:
-		printk("%s: unknown sched op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
-	if (rc)
-		return rc;
-
-	return xencomm_arch_hypercall_sched_op(cmd, desc);
-}
-EXPORT_SYMBOL_GPL(xencomm_mini_hypercall_sched_op);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
deleted file mode 100644
index 7c67373ccc..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- *          Tristan Gingold <tristan.gingold@bull.net>
- */
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/module.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/platform.h>
-#define __XEN__
-#include <xen/interface/domctl.h>
-#include <xen/interface/sysctl.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/version.h>
-#include <xen/interface/event_channel.h>
-#include <xen/interface/acm_ops.h>
-#include <xen/interface/hvm/params.h>
-#include <xen/public/privcmd.h>
-#include <asm/hypercall.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/xen/xencomm.h>
-
-#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s))
-
-static int
-xencomm_privcmd_platform_op(privcmd_hypercall_t *hypercall)
-{
-	struct xen_platform_op kern_op;
-	struct xen_platform_op __user *user_op = (struct xen_platform_op __user *)hypercall->arg[0];
-	struct xencomm_handle *op_desc;
-	struct xencomm_handle *desc = NULL;
-	int ret = 0;
-
-	if (copy_from_user(&kern_op, user_op, sizeof(struct xen_platform_op)))
-		return -EFAULT;
-
-	if (kern_op.interface_version != XENPF_INTERFACE_VERSION)
-		return -EACCES;
-
-	op_desc = xencomm_create_inline(&kern_op);
-
-	switch (kern_op.cmd) {
-	default:
-		printk("%s: unknown platform cmd %d\n", __func__, kern_op.cmd);
-		return -ENOSYS;
-	}
-
-	if (ret) {
-		/* error mapping the nested pointer */
-		return ret;
-	}
-
-	ret = xencomm_arch_hypercall_platform_op(op_desc);
-
-	/* FIXME: should we restore the handle?  */
-	if (copy_to_user(user_op, &kern_op, sizeof(struct xen_platform_op)))
-		ret = -EFAULT;
-
-	if (desc)
-		xencomm_free(desc);
-	return ret;
-}
-
-/*
- * Temporarily disable the NUMA PHYSINFO code until the rest of the
- * changes are upstream.
- */
-#undef IA64_NUMA_PHYSINFO
-
-static int
-xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall)
-{
-	xen_sysctl_t kern_op;
-	xen_sysctl_t __user *user_op;
-	struct xencomm_handle *op_desc;
-	struct xencomm_handle *desc = NULL;
-	struct xencomm_handle *desc1 = NULL;
-	int ret = 0;
-
-	user_op = (xen_sysctl_t __user *)hypercall->arg[0];
-
-	if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t)))
-		return -EFAULT;
-
-	if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION)
-		return -EACCES;
-
-	op_desc = xencomm_create_inline(&kern_op);
-
-	switch (kern_op.cmd) {
-	case XEN_SYSCTL_readconsole:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.readconsole.buffer),
-			kern_op.u.readconsole.count,
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.readconsole.buffer,
-		                     (void *)desc);
-		break;
-	case XEN_SYSCTL_tbuf_op:
-#ifndef IA64_NUMA_PHYSINFO
-	case XEN_SYSCTL_physinfo:
-#endif
-	case XEN_SYSCTL_sched_id:
-		break;
-	case XEN_SYSCTL_perfc_op:
-	{
-		struct xencomm_handle *tmp_desc;
-		xen_sysctl_t tmp_op = {
-			.cmd = XEN_SYSCTL_perfc_op,
-			.interface_version = XEN_SYSCTL_INTERFACE_VERSION,
-			.u.perfc_op = {
-				.cmd = XEN_SYSCTL_PERFCOP_query,
-				// .desc.p = NULL,
-				// .val.p = NULL,
-			},
-		};
-
-		if (xen_guest_handle(kern_op.u.perfc_op.desc) == NULL) {
-			if (xen_guest_handle(kern_op.u.perfc_op.val) != NULL)
-				return -EINVAL;
-			break;
-		}
-
-		/* query the buffer size for xencomm */
-		tmp_desc = xencomm_create_inline(&tmp_op);
-		ret = xencomm_arch_hypercall_sysctl(tmp_desc);
-		if (ret)
-			return ret;
-
-		ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.desc),
-		                     tmp_op.u.perfc_op.nr_counters *
-		                     sizeof(xen_sysctl_perfc_desc_t),
-		                     &desc, GFP_KERNEL);
-		if (ret)
-			return ret;
-
-		set_xen_guest_handle(kern_op.u.perfc_op.desc, (void *)desc);
-
-		ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.val),
-		                     tmp_op.u.perfc_op.nr_vals *
-		                     sizeof(xen_sysctl_perfc_val_t),
-		                     &desc1, GFP_KERNEL);
-		if (ret)
-			xencomm_free(desc);
-
-		set_xen_guest_handle(kern_op.u.perfc_op.val, (void *)desc1);
-		break;
-	}
-	case XEN_SYSCTL_getdomaininfolist:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.getdomaininfolist.buffer),
-			kern_op.u.getdomaininfolist.max_domains *
-			sizeof(xen_domctl_getdomaininfo_t),
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer,
-				     (void *)desc);
-		break;
-	case XEN_SYSCTL_debug_keys:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.debug_keys.keys),
-			kern_op.u.debug_keys.nr_keys,
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.debug_keys.keys,
-				     (void *)desc);
-		break;
-
-#ifdef IA64_NUMA_PHYSINFO
-	case XEN_SYSCTL_physinfo:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.physinfo.memory_chunks),
-			PUBLIC_MAXCHUNKS * sizeof(node_data_t),
-			&desc, GFP_KERNEL);
-		if (ret)
-			return ret;
-		set_xen_guest_handle(kern_op.u.physinfo.memory_chunks,
-		                     (void *)desc);
-
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.physinfo.cpu_to_node),
-			PUBLIC_MAX_NUMNODES * sizeof(u64),
-			&desc1, GFP_KERNEL);
-		if (ret)
-			xencomm_free(desc);
-		set_xen_guest_handle(kern_op.u.physinfo.cpu_to_node,
-		                     (void *)desc1);
-		break;
-#endif
-	default:
-		printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd);
-		return -ENOSYS;
-	}
-
-	if (ret) {
-		/* error mapping the nested pointer */
-		return ret;
-	}
-
-	ret = xencomm_arch_hypercall_sysctl(op_desc);
-
-	/* FIXME: should we restore the handles?  */
-	if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t)))
-		ret = -EFAULT;
-
-	if (desc)
-		xencomm_free(desc);
-	if (desc1)
-		xencomm_free(desc1);
-	return ret;
-}
-
-static int
-xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall)
-{
-	xen_domctl_t kern_op;
-	xen_domctl_t __user *user_op;
-	struct xencomm_handle *op_desc;
-	struct xencomm_handle *desc = NULL;
-	int ret = 0;
-
-	user_op = (xen_domctl_t __user *)hypercall->arg[0];
-
-	if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t)))
-		return -EFAULT;
-
-	if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION)
-		return -EACCES;
-
-	op_desc = xencomm_create_inline(&kern_op);
-
-	switch (kern_op.cmd) {
-	case XEN_DOMCTL_createdomain:
-	case XEN_DOMCTL_destroydomain:
-	case XEN_DOMCTL_pausedomain:
-	case XEN_DOMCTL_unpausedomain:
-	case XEN_DOMCTL_getdomaininfo:
-		break;
-	case XEN_DOMCTL_getmemlist:
-	{
-		unsigned long nr_pages = kern_op.u.getmemlist.max_pfns;
-
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.getmemlist.buffer),
-			nr_pages * sizeof(unsigned long),
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.getmemlist.buffer,
-		                     (void *)desc);
-		break;
-	}
-	case XEN_DOMCTL_getpageframeinfo:
-		break;
-	case XEN_DOMCTL_getpageframeinfo2:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.getpageframeinfo2.array),
-			kern_op.u.getpageframeinfo2.num,
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.getpageframeinfo2.array,
-		                     (void *)desc);
-		break;
-	case XEN_DOMCTL_shadow_op:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap),
-			ROUND_DIV(kern_op.u.shadow_op.pages, 8),
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap,
-		                     (void *)desc);
-		break;
-	case XEN_DOMCTL_max_mem:
-		break;
-	case XEN_DOMCTL_setvcpucontext:
-	case XEN_DOMCTL_getvcpucontext:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.vcpucontext.ctxt),
-			sizeof(vcpu_guest_context_t),
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc);
-		break;
-	case XEN_DOMCTL_getvcpuinfo:
-		break;
-	case XEN_DOMCTL_setvcpuaffinity:
-	case XEN_DOMCTL_getvcpuaffinity:
-		ret = xencomm_create(
-			xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap),
-			ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8),
-			&desc, GFP_KERNEL);
-		set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap,
-		                     (void *)desc);
-		break;
-	case XEN_DOMCTL_max_vcpus:
-	case XEN_DOMCTL_scheduler_op:
-	case XEN_DOMCTL_setdomainhandle:
-	case XEN_DOMCTL_setdebugging:
-	case XEN_DOMCTL_irq_permission:
-	case XEN_DOMCTL_iomem_permission:
-	case XEN_DOMCTL_ioport_permission:
-	case XEN_DOMCTL_hypercall_init:
-	case XEN_DOMCTL_arch_setup:
-	case XEN_DOMCTL_settimeoffset:
-	case XEN_DOMCTL_sendtrigger:
-		break;
-	default:
-		printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd);
-		return -ENOSYS;
-	}
-
-	if (ret) {
-		/* error mapping the nested pointer */
-		return ret;
-	}
-
-	ret = xencomm_arch_hypercall_domctl (op_desc);
-
-	/* FIXME: should we restore the handle?  */
-	if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t)))
-		ret = -EFAULT;
-
-	if (desc)
-		xencomm_free(desc);
-	return ret;
-}
-
-static int
-xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall)
-{
-	int cmd = hypercall->arg[0];
-	void __user *arg = (void __user *)hypercall->arg[1];
-	struct xencomm_handle *op_desc;
-	struct xencomm_handle *desc = NULL;
-	int ret;
-
-	switch (cmd) {
-	case ACMOP_getssid:
-	{
-		struct acm_getssid kern_arg;
-
-		if (copy_from_user(&kern_arg, arg, sizeof (kern_arg)))
-			return -EFAULT;
-
-		op_desc = xencomm_create_inline(&kern_arg);
-
-		ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf),
-		                     kern_arg.ssidbuf_size, &desc, GFP_KERNEL);
-		if (ret)
-			return ret;
-
-		set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc);
-
-		ret = xencomm_arch_hypercall_acm_op(cmd, op_desc);
-
-		xencomm_free(desc);
-
-		if (copy_to_user(arg, &kern_arg, sizeof (kern_arg)))
-			return -EFAULT;
-
-		return ret;
-	}
-	default:
-		printk("%s: unknown acm_op cmd %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	return ret;
-}
-
-static int
-xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall)
-{
-	const unsigned long cmd = hypercall->arg[0];
-	int ret = 0;
-
-	switch (cmd) {
-	case XENMEM_increase_reservation:
-	case XENMEM_decrease_reservation:
-	case XENMEM_populate_physmap:
-	{
-		xen_memory_reservation_t kern_op;
-		xen_memory_reservation_t __user *user_op;
-		struct xencomm_handle *desc = NULL;
-		struct xencomm_handle *desc_op;
-
-		user_op = (xen_memory_reservation_t __user *)hypercall->arg[1];
-		if (copy_from_user(&kern_op, user_op,
-		                   sizeof(xen_memory_reservation_t)))
-			return -EFAULT;
-		desc_op = xencomm_create_inline(&kern_op);
-
-		if (xen_guest_handle(kern_op.extent_start)) {
-			void * addr;
-
-			addr = xen_guest_handle(kern_op.extent_start);
-			ret = xencomm_create
-				(addr,
-				 kern_op.nr_extents *
-				 sizeof(*xen_guest_handle
-					(kern_op.extent_start)),
-				 &desc, GFP_KERNEL);
-			if (ret)
-				return ret;
-			set_xen_guest_handle(kern_op.extent_start,
-			                     (void *)desc);
-		}
-
-		ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
-
-		if (desc)
-			xencomm_free(desc);
-
-		if (ret != 0)
-			return ret;
-
-		if (copy_to_user(user_op, &kern_op,
-		                 sizeof(xen_memory_reservation_t)))
-			return -EFAULT;
-
-		return ret;
-	}
-	case XENMEM_translate_gpfn_list:
-	{
-		xen_translate_gpfn_list_t kern_op;
-		xen_translate_gpfn_list_t __user *user_op;
-		struct xencomm_handle *desc_gpfn = NULL;
-		struct xencomm_handle *desc_mfn = NULL;
-		struct xencomm_handle *desc_op;
-		void *addr;
-
-		user_op = (xen_translate_gpfn_list_t __user *)
-			hypercall->arg[1];
-		if (copy_from_user(&kern_op, user_op,
-		                   sizeof(xen_translate_gpfn_list_t)))
-			return -EFAULT;
-		desc_op = xencomm_create_inline(&kern_op);
-
-		if (kern_op.nr_gpfns) {
-			/* gpfn_list.  */
-			addr = xen_guest_handle(kern_op.gpfn_list);
-
-			ret = xencomm_create(addr, kern_op.nr_gpfns *
-			                     sizeof(*xen_guest_handle
-			                            (kern_op.gpfn_list)),
-			                     &desc_gpfn, GFP_KERNEL);
-			if (ret)
-				return ret;
-			set_xen_guest_handle(kern_op.gpfn_list,
-			                     (void *)desc_gpfn);
-
-			/* mfn_list.  */
-			addr = xen_guest_handle(kern_op.mfn_list);
-
-			ret = xencomm_create(addr, kern_op.nr_gpfns *
-			                     sizeof(*xen_guest_handle
-			                            (kern_op.mfn_list)),
-			                     &desc_mfn, GFP_KERNEL);
-			if (ret)
-				return ret;
-			set_xen_guest_handle(kern_op.mfn_list,
-			                     (void *)desc_mfn);
-		}
-
-		ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
-
-		if (desc_gpfn)
-			xencomm_free(desc_gpfn);
-
-		if (desc_mfn)
-			xencomm_free(desc_mfn);
-
-		if (ret != 0)
-			return ret;
-
-		return ret;
-	}
-	default:
-		printk("%s: unknown memory op %lu\n", __func__, cmd);
-		ret = -ENOSYS;
-	}
-	return ret;
-}
-
-static int
-xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall)
-{
-	int cmd = hypercall->arg[0];
-	void __user *arg = (void __user *)hypercall->arg[1];
-	struct xencomm_handle *desc;
-	size_t argsize;
-	int rc;
-
-	switch (cmd) {
-	case XENVER_version:
-		/* do not actually pass an argument */
-		return xencomm_arch_hypercall_xen_version(cmd, 0);
-	case XENVER_extraversion:
-		argsize = sizeof(xen_extraversion_t);
-		break;
-	case XENVER_compile_info:
-		argsize = sizeof(xen_compile_info_t);
-		break;
-	case XENVER_capabilities:
-		argsize = sizeof(xen_capabilities_info_t);
-		break;
-	case XENVER_changeset:
-		argsize = sizeof(xen_changeset_info_t);
-		break;
-	case XENVER_platform_parameters:
-		argsize = sizeof(xen_platform_parameters_t);
-		break;
-	case XENVER_pagesize:
-		argsize = (arg == NULL) ? 0 : sizeof(void *);
-		break;
-	case XENVER_get_features:
-		argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
-		break;
-
-	default:
-		printk("%s: unknown version op %d\n", __func__, cmd);
-		return -ENOSYS;
-	}
-
-	rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL);
-	if (rc)
-		return rc;
-
-	rc = xencomm_arch_hypercall_xen_version(cmd, desc);
-
-	xencomm_free(desc);
-
-	return rc;
-}
-
-static int
-xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall)
-{
-	int cmd = hypercall->arg[0];
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-	int ret;
-
-	switch (cmd) {
-	case EVTCHNOP_alloc_unbound:
-		argsize = sizeof(evtchn_alloc_unbound_t);
-		break;
-
-	case EVTCHNOP_status:
-		argsize = sizeof(evtchn_status_t);
-		break;
-
-	default:
-		printk("%s: unknown EVTCHNOP %d\n", __func__, cmd);
-		return -EINVAL;
-	}
-
-	ret = xencomm_create((void *)hypercall->arg[1], argsize,
-	                     &desc, GFP_KERNEL);
-	if (ret)
-		return ret;
-
-	ret = xencomm_arch_hypercall_event_channel_op(cmd, desc);
-
-	xencomm_free(desc);
-	return ret;
-}
-
-static int
-xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall)
-{
-	int cmd = hypercall->arg[0];
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-	int ret;
-
-	switch (cmd) {
-	case HVMOP_get_param:
-	case HVMOP_set_param:
-		argsize = sizeof(xen_hvm_param_t);
-		break;
-	case HVMOP_set_pci_intx_level:
-		argsize = sizeof(xen_hvm_set_pci_intx_level_t);
-		break;
-	case HVMOP_set_isa_irq_level:
-		argsize = sizeof(xen_hvm_set_isa_irq_level_t);
-		break;
-	case HVMOP_set_pci_link_route:
-		argsize = sizeof(xen_hvm_set_pci_link_route_t);
-		break;
-
-	default:
-		printk("%s: unknown HVMOP %d\n", __func__, cmd);
-		return -EINVAL;
-	}
-
-	ret = xencomm_create((void *)hypercall->arg[1], argsize,
-	                     &desc, GFP_KERNEL);
-	if (ret)
-		return ret;
-
-	ret = xencomm_arch_hypercall_hvm_op(cmd, desc);
-
-	xencomm_free(desc);
-	return ret;
-}
-
-static int
-xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall)
-{
-	int cmd = hypercall->arg[0];
-	struct xencomm_handle *desc;
-	unsigned int argsize;
-	int ret;
-
-	switch (cmd) {
-	case SCHEDOP_remote_shutdown:
-		argsize = sizeof(sched_remote_shutdown_t);
-		break;
-	default:
-		printk("%s: unknown SCHEDOP %d\n", __func__, cmd);
-		return -EINVAL;
-	}
-
-	ret = xencomm_create((void *)hypercall->arg[1], argsize,
-	                     &desc, GFP_KERNEL);
-	if (ret)
-		return ret;
-
-	ret = xencomm_arch_hypercall_sched_op(cmd, desc);
-
-	xencomm_free(desc);
-	return ret;
-}
-
-int
-privcmd_hypercall(privcmd_hypercall_t *hypercall)
-{
-	switch (hypercall->op) {
-	case __HYPERVISOR_platform_op:
-		return xencomm_privcmd_platform_op(hypercall);
-	case __HYPERVISOR_domctl:
-		return xencomm_privcmd_domctl(hypercall);
-	case __HYPERVISOR_sysctl:
-		return xencomm_privcmd_sysctl(hypercall);
-        case __HYPERVISOR_acm_op:
-		return xencomm_privcmd_acm_op(hypercall);
-	case __HYPERVISOR_xen_version:
-		return xencomm_privcmd_xen_version(hypercall);
-	case __HYPERVISOR_memory_op:
-		return xencomm_privcmd_memory_op(hypercall);
-	case __HYPERVISOR_event_channel_op:
-		return xencomm_privcmd_event_channel_op(hypercall);
-	case __HYPERVISOR_hvm_op:
-		return xencomm_privcmd_hvm_op(hypercall);
-	case __HYPERVISOR_sched_op:
-		return xencomm_privcmd_sched_op(hypercall);
-	default:
-		printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op);
-		return -ENOSYS;
-	}
-}
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c b/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c
deleted file mode 100644
index 5962e73aa6..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xen_dma.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
- * 	Alex Williamson <alex.williamson@hp.com>
- *
- * Basic DMA mapping services for Xen guests.
- * Based on arch/i386/kernel/pci-dma-xen.c.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <asm/scatterlist.h>
-
-#define IOMMU_BUG_ON(test)					\
-do {								\
-	if (unlikely(test)) {					\
-		printk(KERN_ALERT "Fatal DMA error!\n");	\
-		BUG();						\
-	}							\
-} while (0)
-
-
-/*
- * This should be broken out of swiotlb and put in a common place
- * when merged with upstream Linux.
- */
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
-{
-	dma_addr_t mask = 0xffffffff;
-
-	/* If the device has a mask, use it, otherwise default to 32 bits */
-	if (hwdev && hwdev->dma_mask)
-		mask = *hwdev->dma_mask;
-	return (addr & ~mask) != 0;
-}
-
-int
-xen_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-	   int direction)
-{
-	int i;
-
-	for (i = 0 ; i < nents ; i++) {
-		sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
-		sg[i].dma_length  = sg[i].length;
-
-		IOMMU_BUG_ON(address_needs_mapping(hwdev, sg[i].dma_address));
-	}
-
-	return nents;
-}
-EXPORT_SYMBOL(xen_map_sg);
-
-void
-xen_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
-	     int direction)
-{
-}
-EXPORT_SYMBOL(xen_unmap_sg);
-
-int
-xen_dma_mapping_error(dma_addr_t dma_addr)
-{
-	return 0;
-}
-EXPORT_SYMBOL(xen_dma_mapping_error);
-
-int
-xen_dma_supported(struct device *dev, u64 mask)
-{
-	return 1;
-}
-EXPORT_SYMBOL(xen_dma_supported);
-
-void *
-xen_alloc_coherent(struct device *dev, size_t size,
-		   dma_addr_t *dma_handle, gfp_t gfp)
-{
-	unsigned long vaddr;
-	unsigned int order = get_order(size);
-
-	vaddr = __get_free_pages(gfp, order);
-
-	if (!vaddr)
-		return NULL;
-
-	if (xen_create_contiguous_region(vaddr, order,
-					 dev->coherent_dma_mask)) {
-		free_pages(vaddr, order);
-		return NULL;
-	}
-
-	memset((void *)vaddr, 0, size);
-	*dma_handle = virt_to_bus((void *)vaddr);
-
-	return (void *)vaddr;
-}
-EXPORT_SYMBOL(xen_alloc_coherent);
-
-void
-xen_free_coherent(struct device *dev, size_t size,
-		      void *vaddr, dma_addr_t dma_handle)
-{
-	unsigned int order =  get_order(size);
-
-	xen_destroy_contiguous_region((unsigned long)vaddr, order);
-	free_pages((unsigned long)vaddr, order);
-}
-EXPORT_SYMBOL(xen_free_coherent);
-
-dma_addr_t
-xen_map_single(struct device *dev, void *ptr, size_t size,
-	       int direction)
-{
-	dma_addr_t dma_addr = virt_to_bus(ptr);
-
-	IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
-	IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
-
-	return dma_addr;
-}
-EXPORT_SYMBOL(xen_map_single);
-
-void
-xen_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		 int direction)
-{
-}
-EXPORT_SYMBOL(xen_unmap_single);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c b/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
deleted file mode 100644
index 367b6b32de..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <xen/interface/xen.h>
-#include <asm/page.h>
-
-#ifdef HAVE_XEN_PLATFORM_COMPAT_H
-#include <xen/platform-compat.h>
-#endif
-
-#include <asm/xen/xencomm.h>
-
-static int xencomm_debug = 0;
-
-static unsigned long kernel_start_pa;
-
-void
-xencomm_init (void)
-{
-	kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START);
-}
-
-/* Translate virtual address to physical address.  */
-unsigned long
-xencomm_vaddr_to_paddr(unsigned long vaddr)
-{
-#ifndef CONFIG_VMX_GUEST
-	struct page *page;
-	struct vm_area_struct *vma;
-#endif
-
-	if (vaddr == 0)
-		return 0;
-
-#ifdef __ia64__
-	if (REGION_NUMBER(vaddr) == 5) {
-		pgd_t *pgd;
-		pud_t *pud;
-		pmd_t *pmd;
-		pte_t *ptep;
-
-		/* On ia64, TASK_SIZE refers to current.  It is not initialized
-		   during boot.
-		   Furthermore the kernel is relocatable and __pa() doesn't
-		   work on  addresses.  */
-		if (vaddr >= KERNEL_START
-		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) {
-			return vaddr - kernel_start_pa;
-		}
-
-		/* In kernel area -- virtually mapped.  */
-		pgd = pgd_offset_k(vaddr);
-		if (pgd_none(*pgd) || pgd_bad(*pgd))
-			return ~0UL;
-
-		pud = pud_offset(pgd, vaddr);
-		if (pud_none(*pud) || pud_bad(*pud))
-			return ~0UL;
-
-		pmd = pmd_offset(pud, vaddr);
-		if (pmd_none(*pmd) || pmd_bad(*pmd))
-			return ~0UL;
-
-		ptep = pte_offset_kernel(pmd, vaddr);
-		if (!ptep)
-			return ~0UL;
-
-		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
-	}
-#endif
-
-	if (vaddr > TASK_SIZE) {
-		/* kernel address */
-		return __pa(vaddr);
-	}
-
-
-#ifdef CONFIG_VMX_GUEST
-	/* No privcmd within vmx guest.  */
-	return ~0UL;
-#else
-	/* XXX double-check (lack of) locking */
-	vma = find_extend_vma(current->mm, vaddr);
-	if (!vma)
-		return ~0UL;
-
-	/* We assume the page is modified.  */
-	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
-	if (!page)
-		return ~0UL;
-
-	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
-#endif
-}
-
-static int
-xencomm_init_desc(struct xencomm_desc *desc, void *buffer, unsigned long bytes)
-{
-	unsigned long recorded = 0;
-	int i = 0;
-
-	BUG_ON((buffer == NULL) && (bytes > 0));
-
-	/* record the physical pages used */
-	if (buffer == NULL)
-		desc->nr_addrs = 0;
-
-	while ((recorded < bytes) && (i < desc->nr_addrs)) {
-		unsigned long vaddr = (unsigned long)buffer + recorded;
-		unsigned long paddr;
-		int offset;
-		int chunksz;
-
-		offset = vaddr % PAGE_SIZE; /* handle partial pages */
-		chunksz = min(PAGE_SIZE - offset, bytes - recorded);
-
-		paddr = xencomm_vaddr_to_paddr(vaddr);
-		if (paddr == ~0UL) {
-			printk("%s: couldn't translate vaddr %lx\n",
-			       __func__, vaddr);
-			return -EINVAL;
-		}
-
-		desc->address[i++] = paddr;
-		recorded += chunksz;
-	}
-
-	if (recorded < bytes) {
-		printk("%s: could only translate %ld of %ld bytes\n",
-		       __func__, recorded, bytes);
-		return -ENOSPC;
-	}
-
-	/* mark remaining addresses invalid (just for safety) */
-	while (i < desc->nr_addrs)
-		desc->address[i++] = XENCOMM_INVALID;
-
-	desc->magic = XENCOMM_MAGIC;
-
-	return 0;
-}
-
-static struct xencomm_desc *
-xencomm_alloc(gfp_t gfp_mask)
-{
-	struct xencomm_desc *desc;
-
-	desc = (struct xencomm_desc *)__get_free_page(gfp_mask);
-	if (desc == NULL)
-		panic("%s: page allocation failed\n", __func__);
-
-	desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) /
-	                 sizeof(*desc->address);
-
-	return desc;
-}
-
-void
-xencomm_free(struct xencomm_handle *desc)
-{
-	if (desc)
-		free_page((unsigned long)__va(desc));
-}
-
-int
-xencomm_create(void *buffer, unsigned long bytes,
-               struct xencomm_handle **ret, gfp_t gfp_mask)
-{
-	struct xencomm_desc *desc;
-	struct xencomm_handle *handle;
-	int rc;
-
-	if (xencomm_debug)
-		printk("%s: %p[%ld]\n", __func__, buffer, bytes);
-
-	if (buffer == NULL || bytes == 0) {
-		*ret = (struct xencomm_handle *)NULL;
-		return 0;
-	}
-
-	desc = xencomm_alloc(gfp_mask);
-	if (!desc) {
-		printk("%s failure\n", "xencomm_alloc");
-		return -ENOMEM;
-	}
-	handle = (struct xencomm_handle *)__pa(desc);
-
-	rc = xencomm_init_desc(desc, buffer, bytes);
-	if (rc) {
-		printk("%s failure: %d\n", "xencomm_init_desc", rc);
-		xencomm_free(handle);
-		return rc;
-	}
-
-	*ret = handle;
-	return 0;
-}
-
-/* "mini" routines, for stack-based communications: */
-
-static void *
-xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area)
-{
-	unsigned long base;
-	unsigned int pageoffset;
-
-	while (*nbr_area >= 0) {
-		/* Allocate an area.  */
-		(*nbr_area)--;
-
-		base = (unsigned long)(area + *nbr_area);
-		pageoffset = base % PAGE_SIZE;
-
-		/* If the area does not cross a page, use it.  */
-		if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini))
-			return &area[*nbr_area];
-	}
-	/* No more area.  */
-	return NULL;
-}
-
-int
-xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
-                    void *buffer, unsigned long bytes,
-                    struct xencomm_handle **ret)
-{
-	struct xencomm_desc *desc;
-	int rc;
-	unsigned long res;
-
-	desc = xencomm_alloc_mini(area, nbr_area);
-	if (!desc)
-		return -ENOMEM;
-	desc->nr_addrs = XENCOMM_MINI_ADDRS;
-
-	rc = xencomm_init_desc(desc, buffer, bytes);
-	if (rc)
-		return rc;
-
-	res = xencomm_vaddr_to_paddr((unsigned long)desc);
-	if (res == ~0UL)
-		return -EINVAL;
-
-	*ret = (struct xencomm_handle*)res;
-	return 0;
-}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
deleted file mode 100644
index b9394ab050..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- * ia64/xen/entry.S
- *
- * Alternate kernel routines for Xen.  Heavily leveraged from
- *   ia64/kernel/entry.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#ifdef CONFIG_XEN
-#include "xenminstate.h"
-#else
-#include "minstate.h"
-#endif
-
-/*
- * prev_task <- ia64_switch_to(struct task_struct *next)
- *	With Ingo's new scheduler, interrupts are disabled when this routine gets
- *	called.  The code starting at .map relies on this.  The rest of the code
- *	doesn't care about the interrupt masking status.
- */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_switch_to)
-	.prologue
-	alloc r16=ar.pfs,1,0,0,0
-	movl r22=running_on_xen;;
-	ld4 r22=[r22];;
-	cmp.eq p7,p0=r22,r0
-(p7)	br.cond.sptk.many __ia64_switch_to;;
-#else
-GLOBAL_ENTRY(ia64_switch_to)
-	.prologue
-	alloc r16=ar.pfs,1,0,0,0
-#endif
-	DO_SAVE_SWITCH_STACK
-	.body
-
-	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
-	movl r25=init_task
-	mov r27=IA64_KR(CURRENT_STACK)
-	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-	dep r20=0,in0,61,3		// physical address of "next"
-	;;
-	st8 [r22]=sp			// save kernel stack pointer of old task
-	shr.u r26=r20,IA64_GRANULE_SHIFT
-	cmp.eq p7,p6=r25,in0
-	;;
-	/*
-	 * If we've already mapped this task's page, we can skip doing it again.
-	 */
-(p6)	cmp.eq p7,p6=r26,r27
-(p6)	br.cond.dpnt .map
-	;;
-.done:
-	ld8 sp=[r21]			// load kernel stack pointer of new task
-#ifdef CONFIG_XEN
-	// update "current" application register
-	mov r8=IA64_KR_CURRENT
-	mov r9=in0;;
-	XEN_HYPER_SET_KR
-#else
-	mov IA64_KR(CURRENT)=in0	// update "current" application register
-#endif
-	mov r8=r13			// return pointer to previously running task
-	mov r13=in0			// set "current" pointer
-	;;
-	DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
-	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
-#endif
-	br.ret.sptk.many rp		// boogie on out in new context
-
-.map:
-#ifdef CONFIG_XEN
-    movl r25=XSI_PSR_IC			// clear psr.ic
-    ;;
-    st4 [r25]=r0
-    ;;
-#else
-	rsm psr.ic			// interrupts (psr.i) are already disabled here
-#endif
-	movl r25=PAGE_KERNEL
-	;;
-	srlz.d
-	or r23=r25,r20			// construct PA | page properties
-	mov r25=IA64_GRANULE_SHIFT<<2
-	;;
-#ifdef CONFIG_XEN
-	movl r8=XSI_ITIR
-	;;
-	st8 [r8]=r25
-	;;
-	movl r8=XSI_IFA
-	;;
-	st8 [r8]=in0			 // VA of next task...
-	;;
-	mov r25=IA64_TR_CURRENT_STACK
-	// remember last page we mapped...
-	mov r8=IA64_KR_CURRENT_STACK
-	mov r9=r26;;
-	XEN_HYPER_SET_KR;;
-#else
-	mov cr.itir=r25
-	mov cr.ifa=in0			// VA of next task...
-	;;
-	mov r25=IA64_TR_CURRENT_STACK
-	mov IA64_KR(CURRENT_STACK)=r26	// remember last page we mapped...
-#endif
-	;;
-	itr.d dtr[r25]=r23		// wire in new mapping...
-#ifdef CONFIG_XEN
-	;;
-	srlz.d
-	mov r9=1
-	movl r8=XSI_PSR_IC
-	;;
-	st4 [r8]=r9
-	;;
-#else    
-	ssm psr.ic			// reenable the psr.ic bit
-	;;
-	srlz.d
-#endif
-	br.cond.sptk .done
-#ifdef CONFIG_XEN
-END(xen_switch_to)
-#else
-END(ia64_switch_to)
-#endif
-
-	/*
-	 * Invoke a system call, but do some tracing before and after the call.
-	 * We MUST preserve the current register frame throughout this routine
-	 * because some system calls (such as ia64_execve) directly
-	 * manipulate ar.pfs.
-	 */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_trace_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	movl r16=running_on_xen;;
-	ld4 r16=[r16];;
-	cmp.eq p7,p0=r16,r0
-(p7)	br.cond.sptk.many __ia64_trace_syscall;;
-#else
-GLOBAL_ENTRY(ia64_trace_syscall)
-	PT_REGS_UNWIND_INFO(0)
-#endif
-	/*
-	 * We need to preserve the scratch registers f6-f11 in case the system
-	 * call is sigreturn.
-	 */
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
- 	stf.spill [r16]=f6,32
- 	stf.spill [r17]=f7,32
-	;;
- 	stf.spill [r16]=f8,32
- 	stf.spill [r17]=f9,32
-	;;
- 	stf.spill [r16]=f10
- 	stf.spill [r17]=f11
-	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
-	ldf.fill f6=[r16],32
-	ldf.fill f7=[r17],32
-	;;
-	ldf.fill f8=[r16],32
-	ldf.fill f9=[r17],32
-	;;
-	ldf.fill f10=[r16]
-	ldf.fill f11=[r17]
-	// the syscall number may have changed, so re-load it and re-calculate the
-	// syscall entry-point:
-	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
-	;;
-	ld8 r15=[r15]
-	mov r3=NR_syscalls - 1
-	;;
-	adds r15=-1024,r15
-	movl r16=sys_call_table
-	;;
-	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.leu p6,p7=r15,r3
-	;;
-(p6)	ld8 r20=[r20]				// load address of syscall entry point
-(p7)	movl r20=sys_ni_syscall
-	;;
-	mov b6=r20
-	br.call.sptk.many rp=b6			// do the syscall
-.strace_check_retval:
-	cmp.lt p6,p0=r8,r0			// syscall failed?
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	mov r10=0
-(p6)	br.cond.sptk strace_error		// syscall failed ->
-	;;					// avoid RAW on r10
-.strace_save_retval:
-.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
-.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
-	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-.ret3:
-(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
-	br.cond.sptk .work_pending_syscall_end
-
-strace_error:
-	ld8 r3=[r2]				// load pt_regs.r8
-	sub r9=0,r8				// negate return value to get errno value
-	;;
-	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
-	adds r3=16,r2				// r3=&pt_regs.r10
-	;;
-(p6)	mov r10=-1
-(p6)	mov r8=r9
-	br.cond.sptk .strace_save_retval
-#ifdef CONFIG_XEN
-END(xen_trace_syscall)
-#else
-END(ia64_trace_syscall)
-#endif
-
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_ret_from_clone)
-	PT_REGS_UNWIND_INFO(0)
-	movl r16=running_on_xen;;
-	ld4 r16=[r16];;
-	cmp.eq p7,p0=r16,r0
-(p7)	br.cond.sptk.many __ia64_ret_from_clone;;
-#else	
-GLOBAL_ENTRY(ia64_ret_from_clone)
-	PT_REGS_UNWIND_INFO(0)
-#endif	
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	/*
-	 * We need to call schedule_tail() to complete the scheduling process.
-	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
-	 * address of the previously executing task.
-	 */
-	br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret8:
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 r2=[r2]
-	;;
-	mov r8=0
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
-	;;
-	cmp.ne p6,p0=r2,r0
-(p6)	br.cond.spnt .strace_check_retval
-	;;					// added stop bits to prevent r8 dependency
-#ifdef CONFIG_XEN
-	br.cond.sptk ia64_ret_from_syscall
-END(xen_ret_from_clone)
-#else
-END(ia64_ret_from_clone)
-#endif		
-/*
- * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
- *	need to switch to bank 0 and doesn't restore the scratch registers.
- *	To avoid leaking kernel bits, the scratch registers are set to
- *	the following known-to-be-safe values:
- *
- *		  r1: restored (global pointer)
- *		  r2: cleared
- *		  r3: 1 (when returning to user-level)
- *	      r8-r11: restored (syscall return value(s))
- *		 r12: restored (user-level stack pointer)
- *		 r13: restored (user-level thread pointer)
- *		 r14: set to __kernel_syscall_via_epc
- *		 r15: restored (syscall #)
- *	     r16-r17: cleared
- *		 r18: user-level b6
- *		 r19: cleared
- *		 r20: user-level ar.fpsr
- *		 r21: user-level b0
- *		 r22: cleared
- *		 r23: user-level ar.bspstore
- *		 r24: user-level ar.rnat
- *		 r25: user-level ar.unat
- *		 r26: user-level ar.pfs
- *		 r27: user-level ar.rsc
- *		 r28: user-level ip
- *		 r29: user-level psr
- *		 r30: user-level cfm
- *		 r31: user-level pr
- *	      f6-f11: cleared
- *		  pr: restored (user-level pr)
- *		  b0: restored (user-level rp)
- *	          b6: restored
- *		  b7: set to __kernel_syscall_via_epc
- *	     ar.unat: restored (user-level ar.unat)
- *	      ar.pfs: restored (user-level ar.pfs)
- *	      ar.rsc: restored (user-level ar.rsc)
- *	     ar.rnat: restored (user-level ar.rnat)
- *	 ar.bspstore: restored (user-level ar.bspstore)
- *	     ar.fpsr: restored (user-level ar.fpsr)
- *	      ar.ccv: cleared
- *	      ar.csd: cleared
- *	      ar.ssd: cleared
- */
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_leave_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	movl r22=running_on_xen;;
-	ld4 r22=[r22];;
-	cmp.eq p7,p0=r22,r0
-(p7)	br.cond.sptk.many __ia64_leave_syscall;;
-#else
-ENTRY(ia64_leave_syscall)
-	PT_REGS_UNWIND_INFO(0)
-#endif
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
-	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
-#ifdef CONFIG_XEN
-	movl r2=XSI_PSR_I_ADDR
-	mov r18=1
-	;;
-	ld8 r2=[r2]
-	;;
-(pUStk)	st1 [r2]=r18
-#else
-(pUStk)	rsm psr.i
-#endif
-	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.work_processed_syscall:
-	adds r2=PT(LOADRS)+16,r12
-	adds r3=PT(AR_BSPSTORE)+16,r12
-	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
-	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	nop.i 0
-	;;
-	mov r16=ar.bsp				// M2  get existing backing store pointer
-	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
-(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
-(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
-(p6)	br.cond.spnt .work_pending_syscall
-	;;
-	// start restoring the state saved on the kernel stack (struct pt_regs):
-	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
-	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
-	;;
-	invala			// M0|1 invalidate ALAT
-#ifdef CONFIG_XEN
-	movl r28=XSI_PSR_I_ADDR
-	movl r29=XSI_PSR_IC
-	;;
-	ld8 r28=[r28]
-	mov r30=1
-	;;
-	st1	[r28]=r30
-	st4	[r29]=r0	// note: clears both vpsr.i and vpsr.ic!
-	;;
-#else
-	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
-#endif
-	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
-
-	ld8 r29=[r2],16		// M0|1 load cr.ipsr
-	ld8 r28=[r3],16		// M0|1 load cr.iip
-	mov r22=r0		// A    clear r22
-	;;
-	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	ld8 r25=[r3],16		// M0|1 load ar.unat
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-	;;
-	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-#ifdef CONFIG_XEN
-(pKStk)	mov r21=r8
-(pKStk)	XEN_HYPER_GET_PSR
-	;;
-(pKStk)	mov r22=r8
-(pKStk)	mov r8=r21
-	;;
-#else    
-(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
-#endif
-	nop 0
-	;;
-	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
-	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
-	mov f6=f0			// F    clear f6
-	;;
-	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
-	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
-	mov f7=f0				// F    clear f7
-	;;
-	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
-	ld8.fill r1=[r3],16			// M0|1 load r1
-(pUStk) mov r17=1				// A
-	;;
-(pUStk) st1 [r14]=r17				// M2|3
-	ld8.fill r13=[r3],16			// M0|1
-	mov f8=f0				// F    clear f8
-	;;
-	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
-	ld8.fill r15=[r3]			// M0|1 restore r15
-	mov b6=r18				// I0   restore b6
-
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
-	mov f9=f0					// F    clear f9
-(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
-
-	srlz.d				// M0   ensure interruption collection is off (for cover)
-	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
-#ifdef CONFIG_XEN
-	XEN_HYPER_COVER;
-#else
-	cover				// B    add current frame into dirty partition & set cr.ifs
-#endif
-	;;
-(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
-	mov r19=ar.bsp			// M2   get new backing store pointer
-	mov f10=f0			// F    clear f10
-
-	nop.m 0
-	movl r14=__kernel_syscall_via_epc // X
-	;;
-	mov.m ar.csd=r0			// M2   clear ar.csd
-	mov.m ar.ccv=r0			// M2   clear ar.ccv
-	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
-
-	mov.m ar.ssd=r0			// M2   clear ar.ssd
-	mov f11=f0			// F    clear f11
-	br.cond.sptk.many rbs_switch	// B
-#ifdef CONFIG_XEN
-END(xen_leave_syscall)
-#else
-END(ia64_leave_syscall)
-#endif
-
-#ifdef CONFIG_XEN
-GLOBAL_ENTRY(xen_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-	movl r22=running_on_xen;;
-	ld4 r22=[r22];;
-	cmp.eq p7,p0=r22,r0
-(p7)	br.cond.sptk.many __ia64_leave_kernel;;
-#else
-GLOBAL_ENTRY(ia64_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-#endif
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	rsm psr.i				// disable interrupts
-	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else
-#ifdef CONFIG_XEN
-(pUStk)	movl r17=XSI_PSR_I_ADDR
-(pUStk)	mov r31=1
-		;;
-(pUStk) 	ld8 r17=[r17]
-		;;
-(pUStk)	st1 [r17]=r31
-	;;
-#else
-(pUStk)	rsm psr.i
-#endif
-	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.work_processed_kernel:
-	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
-	adds r21=PT(PR)+16,r12
-	;;
-
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
-	adds r30=PT(AR_CCV)+16,r12
-(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-(p6)	br.cond.spnt .work_pending
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-#ifdef CONFIG_XEN
-	movl r23=XSI_PSR_I_ADDR
-	movl r22=XSI_PSR_IC
-	;;
-	ld8 r23=[r23]
-	mov r25=1
-	;;
-	st1 [r23]=r25
-	st4 [r22]=r0		// note: clears both vpsr.i and vpsr.ic!
-	;;
-#else
-	rsm psr.i | psr.ic	// initiate turning off of interrupt and interruption collection
-#endif
-	invala			// invalidate ALAT
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
-	mov ar.ccv=r15
-	;;
-	ldf.fill f11=[r2]
-#ifdef CONFIG_XEN
-	;;
-	// r16-r31 all now hold bank1 values
-	mov r15=ar.unat
-	movl r2=XSI_BANK1_R16
-	movl r3=XSI_BANK1_R16+8
-	;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,16
-	;;
-	mov r3=ar.unat
-	movl r2=XSI_B1NAT
-	;;
-	st8 [r2]=r3
-	mov ar.unat=r15
-	movl r2=XSI_BANKNUM;;
-	st4 [r2]=r0;
-#else
-	bsw.0			// switch back to bank 0 (no stop bit required beforehand...)
-#endif
-	;;
-(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-
-#ifdef CONFIG_XEN    
-(pKStk)	mov r29=r8
-(pKStk)	XEN_HYPER_GET_PSR
-	;;
-(pKStk)	mov r22=r8
-(pKStk)	mov r8=r29
-	;;
-#else
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
-#endif
-	nop.i 0
-	nop.i 0
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r21=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-(pUStk)	mov r17=1
-	;;
-	ld8.fill r3=[r16]
-(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
-	shr.u r18=r19,16	// get byte size of existing "dirty" partition
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
-	;;
-	ld4 r17=[r17]		// r17 = cpu_data->phys_stacked_size_p8
-(pKStk)	br.cond.dpnt skip_rbs_switch
-
-	/*
-	 * Restore user backing store.
-	 *
-	 * NOTE: alloc, loadrs, and cover can't be predicated.
-	 */
-(pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-#ifdef CONFIG_XEN
-	XEN_HYPER_COVER;
-#else
-	cover				// add current frame into dirty partition and set cr.ifs
-#endif
-	;;
-	mov r19=ar.bsp			// get new backing store pointer
-rbs_switch:
-	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
-	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
-	;;
-	sub r19=r19,r16			// calculate total byte size of dirty partition
-	add r18=64,r18			// don't force in0-in7 into memory...
-	;;
-	shl r19=r19,16			// shift size of dirty partition into loadrs position
-	;;
-dont_preserve_current_frame:
-	/*
-	 * To prevent leaking bits between the kernel and user-space,
-	 * we must clear the stacked registers in the "invalid" partition here.
-	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
-	 * 5 registers/cycle on McKinley).
-	 */
-#	define pRecurse	p6
-#	define pReturn	p7
-#ifdef CONFIG_ITANIUM
-#	define Nregs	10
-#else
-#	define Nregs	14
-#endif
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
-	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r17
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-rse_clear_invalid:
-#ifdef CONFIG_ITANIUM
-	// cycle 0
- { .mii
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-}{ .mfb
-	add out1=1,in1			// increment recursion count
-	nop.f 0
-	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
-	;;
-}{ .mfi	// cycle 1
-	mov loc1=0
-	nop.f 0
-	mov loc2=0
-}{ .mib
-	mov loc3=0
-	mov loc4=0
-(pRecurse) br.call.sptk.many b0=rse_clear_invalid
-
-}{ .mfi	// cycle 2
-	mov loc5=0
-	nop.f 0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-}{ .mib
-	mov loc6=0
-	mov loc7=0
-(pReturn) br.ret.sptk.many b0
-}
-#else /* !CONFIG_ITANIUM */
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1			// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-#endif /* !CONFIG_ITANIUM */
-#	undef pRecurse
-#	undef pReturn
-	;;
-	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
-	;;
-	loadrs
-	;;
-skip_rbs_switch:
-	mov ar.unat=r25		// M2
-(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
-(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
-	;;
-(pUStk)	mov ar.bspstore=r23	// M2
-(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
-(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
-	;;
-#ifdef CONFIG_XEN
-	movl r25=XSI_IPSR
-	;;
-	st8[r25]=r29,XSI_IFS_OFS-XSI_IPSR_OFS
-	;;
-#else
-	mov cr.ipsr=r29		// M2
-#endif
-	mov ar.pfs=r26		// I0
-(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
-
-#ifdef CONFIG_XEN
-(p9)	st8 [r25]=r30
-	;;
-	adds r25=XSI_IIP_OFS-XSI_IFS_OFS,r25
-	;;
-#else
-(p9)	mov cr.ifs=r30		// M2
-#endif
-	mov b0=r21		// I0
-(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
-
-	mov ar.fpsr=r20		// M2
-#ifdef CONFIG_XEN
-	st8	[r25]=r28
-#else
-	mov cr.iip=r28		// M2
-#endif
-	nop 0
-	;;
-(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
-	nop 0
-(pLvSys)mov r2=r0
-
-	mov ar.rsc=r27		// M2
-	mov pr=r31,-1		// I0
-#ifdef CONFIG_XEN
-	;;
-	XEN_HYPER_RFI;
-#else
-	rfi			// B
-#endif
-
-	/*
-	 * On entry:
-	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
-	 *	r31 = current->thread_info->flags
-	 * On exit:
-	 *	p6 = TRUE if work-pending-check needs to be redone
-	 */
-.work_pending_syscall:
-	add r2=-8,r2
-	add r3=-8,r3
-	;;
-	st8 [r2]=r8
-	st8 [r3]=r10
-.work_pending:
-	tbit.z p6,p0=r31,TIF_NEED_RESCHED		// current_thread_info()->need_resched==0?
-(p6)	br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
-	;;
-(pKStk) st4 [r20]=r21
-	ssm psr.i		// enable interrupts
-#endif
-	br.call.spnt.many rp=schedule
-.ret9:	cmp.eq p6,p0=r0,r0				// p6 <- 1
-#ifdef CONFIG_XEN
-	movl r2=XSI_PSR_I_ADDR
-	mov r20=1
-	;;
-	ld8 r2=[r2]
-	;;
-	st1 [r2]=r20
-#else
-	rsm psr.i		// disable interrupts
-#endif
-	;;
-#ifdef CONFIG_PREEMPT
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-(pKStk)	st4 [r20]=r0		// preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// re-check
-
-.notify:
-(pUStk)	br.call.spnt.many rp=notify_resume_user
-.ret10:	cmp.ne p6,p0=r0,r0				// p6 <- 0
-(pLvSys)br.cond.sptk.few  .work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel	// don't re-check
-
-.work_pending_syscall_end:
-	adds r2=PT(R8)+16,r12
-	adds r3=PT(R10)+16,r12
-	;;
-	ld8 r8=[r2]
-	ld8 r10=[r3]
-	br.cond.sptk.many .work_processed_syscall	// re-check
-
-#ifdef CONFIG_XEN
-END(xen_leave_kernel)
-#else
-END(ia64_leave_kernel)
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c b/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
deleted file mode 100644
index 3bc6cdbf7e..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenhpski.c
+++ /dev/null
@@ -1,19 +0,0 @@
-
-extern unsigned long xen_get_cpuid(int);
-
-int
-running_on_sim(void)
-{
-	int i;
-	long cpuid[6];
-
-	for (i = 0; i < 5; ++i)
-		cpuid[i] = xen_get_cpuid(i);
-	if ((cpuid[0] & 0xff) != 'H') return 0;
-	if ((cpuid[3] & 0xff) != 0x4) return 0;
-	if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0;
-	if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0;
-	if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0;
-	return 1;
-}
-
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
deleted file mode 100644
index a411bb3a4a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S
+++ /dev/null
@@ -1,2177 +0,0 @@
-/*
- * arch/ia64/xen/ivt.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@hp.com>
- */
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *		// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- *  entry offset ----/     /         /                  /          /
- *  entry number ---------/         /                  /          /
- *  size of the entry -------------/                  /          /
- *  vector name -------------------------------------/          /
- *  interruptions triggering this vector ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-#include <asm/asmmacro.h>
-#include <asm/break.h>
-#include <asm/ia32.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-
-#ifdef CONFIG_XEN
-#define ia64_ivt xen_ivt
-#endif
-
-#if 1
-# define PSR_DEFAULT_BITS	psr.ac
-#else
-# define PSR_DEFAULT_BITS	0
-#endif
-
-#if 0
-  /*
-   * This lets you track the last eight faults that occurred on the CPU.  Make sure ar.k2 isn't
-   * needed for something else before enabling this...
-   */
-# define DBG_FAULT(i)	mov r16=ar.k2;;	shl r16=r16,8;;	add r16=(i),r16;;mov ar.k2=r16
-#else
-# define DBG_FAULT(i)
-#endif
-
-#define MINSTATE_VIRT	/* needed by minstate.h */
-#include "xenminstate.h"
-
-#define FAULT(n)									\
-	mov r31=pr;									\
-	mov r19=n;;			/* prepare to save predicates */		\
-	br.sptk.many dispatch_to_fault_handler
-
-	.section .text.ivt,"ax"
-
-	.align 32768	// align on 32KB boundary
-	.global ia64_ivt
-ia64_ivt:
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(vhpt_miss)
-	DBG_FAULT(0)
-	/*
-	 * The VHPT vector is invoked when the TLB entry for the virtual page table
-	 * is missing.  This happens only as a result of a previous
-	 * (the "original") TLB miss, which may either be caused by an instruction
-	 * fetch or a data access (or non-access).
-	 *
-	 * What we do here is normal TLB miss handing for the _original_ miss,
-	 * followed by inserting the TLB entry for the virtual page table page
-	 * that the VHPT walker was attempting to access.  The latter gets
-	 * inserted as long as page table entry above pte level have valid
-	 * mappings for the faulting address.  The TLB entry for the original
-	 * miss gets inserted only if the pte entry indicates that the page is
-	 * present.
-	 *
-	 * do_page_fault gets invoked in the following cases:
-	 *	- the faulting virtual address uses unimplemented address bits
-	 *	- the faulting virtual address has no valid page table mapping
-	 */
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-#ifdef CONFIG_HUGETLB_PAGE
-	movl r18=PAGE_SHIFT
-	movl r25=XSI_ITIR
-	;;
-	ld8 r25=[r25]
-#endif
-	;;
-#else
-	mov r16=cr.ifa				// get address that caused the TLB miss
-#ifdef CONFIG_HUGETLB_PAGE
-	movl r18=PAGE_SHIFT
-	mov r25=cr.itir
-#endif
-#endif
-	;;
-#ifdef CONFIG_XEN
-	XEN_HYPER_RSM_PSR_DT;
-#else
-	rsm psr.dt				// use physical addressing for data
-#endif
-	mov r31=pr				// save the predicate registers
-	mov r19=IA64_KR(PT_BASE)		// get page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-	shr.u r17=r16,61			// get the region number into r17
-	;;
-	shr.u r22=r21,3
-#ifdef CONFIG_HUGETLB_PAGE
-	extr.u r26=r25,2,6
-	;;
-	cmp.ne p8,p0=r18,r26
-	sub r27=r26,r18
-	;;
-(p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,r27
-#endif
-	;;
-	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
-	shr.u r18=r22,PGDIR_SHIFT		// get bottom portion of pgd index bit
-	;;
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-
-	srlz.d
-	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
-
-	.pred.rel "mutex", p6, p7
-(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
-	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
-#else
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-#endif
-	;;
-	ld8 r17=[r17]				// get *pgd (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
-#ifdef CONFIG_PGTABLE_4
-	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
-	;;
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-(p7)	ld8 r29=[r28]				// get *pud (may be 0)
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r29,r0		// was pud_present(*pud) == NULL?
-	dep r17=r18,r29,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
-#else
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pgd,addr)
-#endif
-	;;
-(p7)	ld8 r20=[r17]				// get *pmd (may be 0)
-	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was pmd_present(*pmd) == NULL?
-	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
-	;;
-(p7)	ld8 r18=[r21]				// read *pte
-#ifdef CONFIG_XEN
-	movl r19=XSI_ISR
-	;;
-	ld8 r19=[r19]
-#else
-	mov r19=cr.isr				// cr.isr bit 32 tells us if this is an insn miss
-#endif
-	;;
-(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
-#ifdef CONFIG_XEN
-	movl r22=XSI_IHA
-	;;
-	ld8 r22=[r22]
-#else
-	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
-#endif
-	;;					// avoid RAW on p7
-(p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
-	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
-	;;
-#ifdef CONFIG_XEN
-	mov r24=r8
-	mov r8=r18
-	;;
-(p10)	XEN_HYPER_ITC_I
-	;;
-(p11)	XEN_HYPER_ITC_D
-	;;
-	mov r8=r24
-	;;
-#else
-(p10)	itc.i r18				// insert the instruction TLB entry
-(p11)	itc.d r18				// insert the data TLB entry
-#endif
-(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
-#ifdef CONFIG_XEN
-	movl r24=XSI_IFA
-	;;
-	st8 [r24]=r22
-	;;
-#else
-	mov cr.ifa=r22
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-(p8)	mov cr.itir=r25				// change to default page-size for VHPT
-#endif
-
-	/*
-	 * Now compute and insert the TLB entry for the virtual page table.  We never
-	 * execute in a page table page so there is no need to set the exception deferral
-	 * bit.
-	 */
-	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
-	;;
-#ifdef CONFIG_XEN
-(p7)	mov r25=r8
-(p7)	mov r8=r24
-	;;
-(p7)	XEN_HYPER_ITC_D
-	;;
-(p7)	mov r8=r25
-	;;
-#else
-(p7)	itc.d r24
-#endif
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	/*
-	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
-	 * between reading the pagetable and the "itc".  If so, flush the entry we
-	 * inserted and retry.  At this point, we have:
-	 *
-	 * r28 = equivalent of pud_offset(pgd, ifa)
-	 * r17 = equivalent of pmd_offset(pud, ifa)
-	 * r21 = equivalent of pte_offset(pmd, ifa)
-	 *
-	 * r29 = *pud
-	 * r20 = *pmd
-	 * r18 = *pte
-	 */
-	ld8 r25=[r21]				// read *pte again
-	ld8 r26=[r17]				// read *pmd again
-#ifdef CONFIG_PGTABLE_4
-	ld8 r19=[r28]				// read *pud again
-#endif
-	cmp.ne p6,p7=r0,r0
-	;;
-	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
-#ifdef CONFIG_PGTABLE_4
-	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
-#endif
-	mov r27=PAGE_SHIFT<<2
-	;;
-(p6)	ptc.l r22,r27				// purge PTE page translation
-(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
-	;;
-(p6)	ptc.l r16,r27				// purge translation
-#endif
-
-	mov pr=r31,-1				// restore predicate registers
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(vhpt_miss)
-
-	.org ia64_ivt+0x400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(itlb_miss)
-	DBG_FAULT(1)
-	/*
-	 * The ITLB handler accesses the PTE via the virtually mapped linear
-	 * page table.  If a nested TLB miss occurs, we switch into physical
-	 * mode, walk the page table, and then re-execute the PTE read and
-	 * go on normally after that.
-	 */
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-#else
-	mov r16=cr.ifa				// get virtual address
-#endif
-	mov r29=b0				// save b0
-	mov r31=pr				// save predicates
-.itlb_fault:
-#ifdef CONFIG_XEN
-	movl r17=XSI_IHA
-	;;
-	ld8 r17=[r17]				// get virtual address of L3 PTE
-#else
-	mov r17=cr.iha				// get virtual address of PTE
-#endif
-	movl r30=1f				// load nested fault continuation point
-	;;
-1:	ld8 r18=[r17]				// read *pte
-	;;
-	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
-#ifdef CONFIG_XEN
-	mov r19=r8
-	mov r8=r18
-	;;
-	XEN_HYPER_ITC_I
-	;;
-	mov r8=r19
-#else
-	itc.i r18
-#endif
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r19=[r17]				// read *pte again and see if same
-	mov r20=PAGE_SHIFT<<2			// setup page size for purge
-	;;
-	cmp.ne p7,p0=r18,r19
-	;;
-(p7)	ptc.l r16,r20
-#endif
-	mov pr=r31,-1
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(itlb_miss)
-
-	.org ia64_ivt+0x0800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(dtlb_miss)
-	DBG_FAULT(2)
-	/*
-	 * The DTLB handler accesses the PTE via the virtually mapped linear
-	 * page table.  If a nested TLB miss occurs, we switch into physical
-	 * mode, walk the page table, and then re-execute the PTE read and
-	 * go on normally after that.
-	 */
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-#else
-	mov r16=cr.ifa				// get virtual address
-#endif
-	mov r29=b0				// save b0
-	mov r31=pr				// save predicates
-dtlb_fault:
-#ifdef CONFIG_XEN
-	movl r17=XSI_IHA
-	;;
-	ld8 r17=[r17]				// get virtual address of L3 PTE
-#else
-	mov r17=cr.iha				// get virtual address of PTE
-#endif
-	movl r30=1f				// load nested fault continuation point
-	;;
-1:	ld8 r18=[r17]				// read *pte
-	;;
-	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
-#ifdef CONFIG_XEN
-	mov r19=r8
-	mov r8=r18
-	;;
-	XEN_HYPER_ITC_D
-	;;
-	mov r8=r19
-	;;
-#else
-	itc.d r18
-#endif
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r19=[r17]				// read *pte again and see if same
-	mov r20=PAGE_SHIFT<<2			// setup page size for purge
-	;;
-	cmp.ne p7,p0=r18,r19
-	;;
-(p7)	ptc.l r16,r20
-#endif
-	mov pr=r31,-1
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(dtlb_miss)
-
-	.org ia64_ivt+0x0c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(alt_itlb_miss)
-	DBG_FAULT(3)
-#ifdef CONFIG_XEN
-	movl r31=XSI_IPSR
-	;;
-	ld8 r21=[r31],XSI_IFA_OFS-XSI_IPSR_OFS	// get ipsr, point to ifa
-	movl r17=PAGE_KERNEL
-	;;
-	ld8 r16=[r31]		// get ifa
-#else
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	movl r17=PAGE_KERNEL
-	mov r21=cr.ipsr
-#endif
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r31=pr
-	;;
-#ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
-	;;
-	cmp.gt p8,p0=6,r22			// user mode
-	;;
-#ifndef CONFIG_XEN
-(p8)	thash r17=r16
-	;;
-(p8)	mov cr.iha=r17
-#endif
-(p8)	mov r29=b0				// save b0
-(p8)	br.cond.dptk .itlb_fault
-#endif
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
-	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
-	shr.u r18=r16,57	// move address bit 61 to bit 4
-	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
-	or r19=r17,r19		// insert PTE control bits into r19
-	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
-(p8)	br.cond.spnt page_fault
-	;;
-#ifdef CONFIG_XEN
-	mov r18=r8
-	mov r8=r19
-	;;
-	XEN_HYPER_ITC_I
-	;;
-	mov r8=r18
-	;;
-	mov pr=r31,-1
-	;;
-	XEN_HYPER_RFI;
-#else
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-#endif
-END(alt_itlb_miss)
-
-	.org ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(alt_dtlb_miss)
-	DBG_FAULT(4)
-#ifdef CONFIG_XEN
-	movl r31=XSI_IPSR
-	;;
-	ld8 r21=[r31],XSI_ISR_OFS-XSI_IPSR_OFS	// get ipsr, point to isr
-	movl r17=PAGE_KERNEL
-	;;
-	ld8 r20=[r31],XSI_IFA_OFS-XSI_ISR_OFS	// get isr, point to ifa
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	ld8 r16=[r31]		// get ifa
-#else
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	movl r17=PAGE_KERNEL
-	mov r20=cr.isr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r21=cr.ipsr
-#endif
-	mov r31=pr
-	;;
-#ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
-	;;
-	cmp.gt p8,p0=6,r22			// access to region 0-5
-	;;
-#ifndef CONFIG_XEN
-(p8)	thash r17=r16
-	;;
-(p8)	mov cr.iha=r17
-#endif
-(p8)	mov r29=b0				// save b0
-(p8)	br.cond.dptk dtlb_fault
-#endif
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
-	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
-	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
-	shr.u r18=r16,57			// move address bit 61 to bit 4
-	and r19=r19,r16				// clear ed, reserved bits, and PTE control bits
-	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
-	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	cmp.ne p8,p0=r0,r23
-(p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
-(p8)	br.cond.spnt page_fault
-
-	dep r21=-1,r21,IA64_PSR_ED_BIT,1
-	or r19=r19,r17		// insert PTE control bits into r19
-	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
-(p6)	mov cr.ipsr=r21
-	;;
-#ifdef CONFIG_XEN
-(p7)	mov r18=r8
-(p7)	mov r8=r19
-	;;
-(p7)	XEN_HYPER_ITC_D
-	;;
-(p7)	mov r8=r18
-	;;
-	mov pr=r31,-1
-	;;
-	XEN_HYPER_RFI;
-#else
-(p7)	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-#endif
-END(alt_dtlb_miss)
-
-	.org ia64_ivt+0x1400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(nested_dtlb_miss)
-	/*
-	 * In the absence of kernel bugs, we get here when the virtually mapped linear
-	 * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
-	 * Access-bit, or Data Access-bit faults).  If the DTLB entry for the virtual page
-	 * table is missing, a nested TLB miss fault is triggered and control is
-	 * transferred to this point.  When this happens, we lookup the pte for the
-	 * faulting address by walking the page table in physical mode and return to the
-	 * continuation point passed in register r30 (or call page_fault if the address is
-	 * not mapped).
-	 *
-	 * Input:	r16:	faulting address
-	 *		r29:	saved b0
-	 *		r30:	continuation address
-	 *		r31:	saved pr
-	 *
-	 * Output:	r17:	physical address of PTE of faulting address
-	 *		r29:	saved b0
-	 *		r30:	continuation address
-	 *		r31:	saved pr
-	 *
-	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
-	 */
-#ifdef CONFIG_XEN
-	XEN_HYPER_RSM_PSR_DT;
-#else
-	rsm psr.dt				// switch to using physical data addressing
-#endif
-	mov r19=IA64_KR(PT_BASE)		// get the page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-#ifdef CONFIG_XEN
-	movl r18=XSI_ITIR
-	;;
-	ld8 r18=[r18]
-#else
-	mov r18=cr.itir
-#endif
-	;;
-	shr.u r17=r16,61			// get the region number into r17
-	extr.u r18=r18,2,6			// get the faulting page size
-	;;
-	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
-	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
-	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
-	;;
-	shr.u r22=r16,r22
-	shr.u r18=r16,r18
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-
-	srlz.d
-	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
-
-	.pred.rel "mutex", p6, p7
-(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#ifdef CONFIG_PGTABLE_4
-	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
-#else
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-#endif
-	;;
-	ld8 r17=[r17]				// get *pgd (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
-	;;
-#ifdef CONFIG_PGTABLE_4
-(p7)	ld8 r17=[r17]				// get *pud (may be 0)
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pud_present(*pud) == NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
-	;;
-#endif
-(p7)	ld8 r17=[r17]				// get *pmd (may be 0)
-	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pmd_present(*pmd) == NULL?
-	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// r17=pte_offset(pmd,addr);
-(p6)	br.cond.spnt page_fault
-	mov b0=r30
-	br.sptk.many b0				// return to continuation point
-END(nested_dtlb_miss)
-
-	.org ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(ikey_miss)
-	DBG_FAULT(6)
-	FAULT(6)
-END(ikey_miss)
-
-	//-----------------------------------------------------------------------------------
-	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
-#ifdef CONFIG_XEN
-	XEN_HYPER_SSM_PSR_DT
-#else
-	ssm psr.dt
-	;;
-	srlz.i
-#endif
-	;;
-	SAVE_MIN_WITH_COVER
-	alloc r15=ar.pfs,0,0,3,0
-#ifdef CONFIG_XEN
-	movl r3=XSI_ISR
-	;;
-	ld8 out1=[r3],XSI_IFA_OFS-XSI_ISR_OFS	// get vcr.isr, point to ifa
-	;;
-	ld8 out0=[r3]				// get vcr.ifa
-	mov r14=1
-	;;
-	add r3=XSI_PSR_IC_OFS-XSI_IFA_OFS, r3	// point to vpsr.ic
-	;;
-	st4 [r3]=r14				// vpsr.ic = 1
-	adds r3=8,r2				// set up second base pointer
-	;;
-#else
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	adds r3=8,r2				// set up second base pointer
-	;;
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collectin is on
-	;;
-#endif
-#ifdef CONFIG_XEN
-    
-#define MASK_TO_PEND_OFS    (-1)
-    
-(p15)	movl r14=XSI_PSR_I_ADDR
-	;;
-(p15)	ld8 r14=[r14]
-	;;
-(p15)	st1 [r14]=r0,MASK_TO_PEND_OFS		// if (p15) vpsr.i = 1
-	;;		// if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
-(p15)	ld1 r14=[r14]	// if (vcpu->vcpu_info->evtchn_upcall_pending)
-	;;
-(p15)	cmp.ne	p15,p0=r14,r0
-	;;
-(p15)	XEN_HYPER_SSM_I
-#else
-(p15)	ssm psr.i				// restore psr.i
-#endif
-	movl r14=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12			// out2 = pointer to pt_regs
-	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
-END(page_fault)
-
-	.org ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(dkey_miss)
-	DBG_FAULT(7)
-	FAULT(7)
-END(dkey_miss)
-
-	.org ia64_ivt+0x2000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(dirty_bit)
-	DBG_FAULT(8)
-	/*
-	 * What we do here is to simply turn on the dirty bit in the PTE.  We need to
-	 * update both the page-table and the TLB entry.  To efficiently access the PTE,
-	 * we address it through the virtual page table.  Most likely, the TLB entry for
-	 * the relevant virtual page table page is still present in the TLB so we can
-	 * normally do this without additional TLB misses.  In case the necessary virtual
-	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
-	 * up the physical address of the L3 PTE and then continue at label 1 below.
-	 */
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-	;;
-#else
-	mov r16=cr.ifa				// get the address that caused the fault
-#endif
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-#ifdef CONFIG_XEN
-	mov r18=r8;
-	mov r8=r16;
-	XEN_HYPER_THASH;;
-	mov r17=r8;
-	mov r8=r18;;
-#else
-	thash r17=r16				// compute virtual address of L3 PTE
-#endif
-	mov r29=b0				// save b0 in case of nested fault
-	mov r31=pr				// save pr
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
-	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
-	;;
-#ifdef CONFIG_XEN
-(p6)	mov r18=r8
-(p6)	mov r8=r25
-	;;
-(p6)	XEN_HYPER_ITC_D
-	;;
-(p6)	mov r8=r18
-#else
-(p6)	itc.d r25				// install updated PTE
-#endif	
-	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov b0=r29				// restore b0
-	mov ar.ccv=r28
-#else
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
-#endif
-	mov pr=r31,-1				// restore pr
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(dirty_bit)
-
-	.org ia64_ivt+0x2400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(iaccess_bit)
-	DBG_FAULT(9)
-	// Like Entry 8, except for instruction access
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-	;;
-#else
-	mov r16=cr.ifa				// get the address that caused the fault
-#endif
-	movl r30=1f				// load continuation point in case of nested fault
-	mov r31=pr				// save predicates
-#ifdef CONFIG_ITANIUM
-	/*
-	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
-	 */
-	mov r17=cr.ipsr
-	;;
-	mov r18=cr.iip
-	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
-	;;
-(p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
-#endif /* CONFIG_ITANIUM */
-	;;
-#ifdef CONFIG_XEN
-	mov r18=r8;
-	mov r8=r16;
-	XEN_HYPER_THASH;;
-	mov r17=r8;
-	mov r8=r18;;
-#else
-	thash r17=r16				// compute virtual address of L3 PTE
-#endif
-	mov r29=b0				// save b0 in case of nested fault)
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_A,r18			// set the accessed bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
-	;;
-#ifdef CONFIG_XEN
-	mov r26=r8
-	mov r8=r25
-	;;
-(p6)	XEN_HYPER_ITC_I
-	;;
-	mov r8=r26
-	;;
-#else
-(p6)	itc.i r25				// install updated PTE
-#endif
-	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov b0=r29				// restore b0
-	mov ar.ccv=r28
-#else /* !CONFIG_SMP */
-	;;
-1:	ld8 r18=[r17]
-	;;
-	or r18=_PAGE_A,r18			// set the accessed bit
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.i r18				// install updated PTE
-#endif /* !CONFIG_SMP */
-	mov pr=r31,-1
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(iaccess_bit)
-
-	.org ia64_ivt+0x2800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(daccess_bit)
-	DBG_FAULT(10)
-	// Like Entry 8, except for data access
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-	;;
-#else
-	mov r16=cr.ifa				// get the address that caused the fault
-#endif
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-#ifdef CONFIG_XEN
-	mov r18=r8
-	mov r8=r16
-	XEN_HYPER_THASH
-	;;
-	mov r17=r8
-	mov r8=r18
-	;;
-#else
-	thash r17=r16				// compute virtual address of L3 PTE
-#endif
-	mov r31=pr
-	mov r29=b0				// save b0 in case of nested fault)
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_A,r18			// set the dirty bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
-	;;
-#ifdef CONFIG_XEN
-	mov r26=r8
-	mov r8=r25
-	;;
-(p6)	XEN_HYPER_ITC_D
-	;;
-	mov r8=r26
-	;;
-#else
-(p6)	itc.d r25				// install updated PTE
-#endif
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-	;;
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov ar.ccv=r28
-#else
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_A,r18			// set the accessed bit
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
-#endif
-	mov b0=r29				// restore b0
-	mov pr=r31,-1
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI
-	dv_serialize_data
-#else
-	rfi
-#endif
-END(daccess_bit)
-
-	.org ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(break_fault)
-	/*
-	 * The streamlined system call entry/exit paths only save/restore the initial part
-	 * of pt_regs.  This implies that the callers of system-calls must adhere to the
-	 * normal procedure calling conventions.
-	 *
-	 *   Registers to be saved & restored:
-	 *	CR registers: cr.ipsr, cr.iip, cr.ifs
-	 *	AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
-	 * 	others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
-	 *   Registers to be restored only:
-	 * 	r8-r11: output value from the system call.
-	 *
-	 * During system call exit, scratch registers (including r15) are modified/cleared
-	 * to prevent leaking bits from kernel to user level.
-	 */
-	DBG_FAULT(11)
-	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
-#ifdef CONFIG_XEN
-	movl r22=XSI_IPSR
-	;;
-	ld8 r29=[r22],XSI_IIM_OFS-XSI_IPSR_OFS	// get ipsr, point to iip
-#else
-	mov r29=cr.ipsr				// M2 (12 cyc)
-#endif
-	mov r31=pr				// I0 (2 cyc)
-
-#ifdef CONFIG_XEN
-	;;
-	ld8 r17=[r22],XSI_IIP_OFS-XSI_IIM_OFS
-#else
-	mov r17=cr.iim				// M2 (2 cyc)
-#endif
-	mov.m r27=ar.rsc			// M2 (12 cyc)
-	mov r18=__IA64_BREAK_SYSCALL		// A
-
-	mov.m ar.rsc=0				// M2
-	mov.m r21=ar.fpsr			// M2 (12 cyc)
-	mov r19=b6				// I0 (2 cyc)
-	;;
-	mov.m r23=ar.bspstore			// M2 (12 cyc)
-	mov.m r24=ar.rnat			// M2 (5 cyc)
-	mov.i r26=ar.pfs			// I0 (2 cyc)
-
-	invala					// M0|1
-	nop.m 0					// M
-	mov r20=r1				// A			save r1
-
-	nop.m 0
-	movl r30=sys_call_table			// X
-
-#ifdef CONFIG_XEN
-	ld8 r28=[r22]
-#else
-	mov r28=cr.iip				// M2 (2 cyc)
-#endif
-	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
-(p7)	br.cond.spnt non_syscall		// B  no ->
-	//
-	// From this point on, we are definitely on the syscall-path
-	// and we can use (non-banked) scratch registers.
-	//
-///////////////////////////////////////////////////////////////////////
-	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
-	mov r2=r16				// A    setup r2 for ia64_syscall_setup
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
-
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-	adds r15=-1024,r15			// A    subtract 1024 from syscall number
-	mov r3=NR_syscalls - 1
-	;;
-	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
-	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
-	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
-
-	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
-	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
-	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
-	;;
-
-	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
-(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
-	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
-
-	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
-	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
-	;;
-
-(p8)	mov r8=0				// A    clear ei to 0
-(p7)	movl r30=sys_ni_syscall			// X
-
-(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
-(p9)	adds r8=1,r8				// A    increment ei to next slot
-	nop.i 0
-	;;
-
-	mov.m r25=ar.unat			// M2 (5 cyc)
-	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
-	adds r15=1024,r15			// A    restore original syscall number
-	//
-	// If any of the above loads miss in L1D, we'll stall here until
-	// the data arrives.
-	//
-///////////////////////////////////////////////////////////////////////
-	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-	mov b6=r30				// I0   setup syscall handler branch reg early
-	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
-
-	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
-	mov r18=ar.bsp				// M2 (12 cyc)
-(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
-	;;
-.back_from_break_fixup:
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
-	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
-	br.call.sptk.many b7=ia64_syscall_setup	// B
-1:
-	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
-	nop 0
-#ifdef CONFIG_XEN
-	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;;
-#else
-	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
-#endif
-	;;
-
-#ifdef CONFIG_XEN
-	movl r16=XSI_PSR_IC
-	mov r3=1
-	;;
-	st4 [r16]=r3,XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS	// vpsr.ic = 1
-#else
-	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
-#endif
-	movl r3=ia64_ret_from_syscall		// X
-	;;
-
-	srlz.i					// M0   ensure interruption collection is on
-	mov rp=r3				// I0   set the real return addr
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
-
-#ifdef CONFIG_XEN
-(p15)	ld8 r16=[r16]				// vpsr.i
-	;;
-(p15)	st1 [r16]=r0,MASK_TO_PEND_OFS		// if (p15) vpsr.i = 1
-	;;		// if (p15) (vcpu->vcpu_info->evtchn_upcall_mask)=0
-(p15)	ld1 r2=[r16]	// if (vcpu->vcpu_info->evtchn_upcall_pending)
-	;;
-(p15)	cmp.ne.unc p6,p0=r2,r0
-	;;
-(p6)	XEN_HYPER_SSM_I				//   do a real ssm psr.i
-#else
-(p15)	ssm psr.i				// M2   restore psr.i
-#endif
-(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
-	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
-	// NOT REACHED
-///////////////////////////////////////////////////////////////////////
-	// On entry, we optimistically assumed that we're coming from user-space.
-	// For the rare cases where a system-call is done from within the kernel,
-	// we fix things up at this point:
-.break_fixup:
-	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
-	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
-	;;
-	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
-	br.cond.sptk .back_from_break_fixup
-END(break_fault)
-
-	.org ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(interrupt)
-	DBG_FAULT(12)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-#ifdef CONFIG_XEN
-	movl r3=XSI_PSR_IC
-	mov r14=1
-	;;
-	st4 [r3]=r14
-#else
-	ssm psr.ic | PSR_DEFAULT_BITS
-#endif
-	;;
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-	SAVE_REST
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-#ifdef CONFIG_XEN
-	;;
-	br.call.sptk.many rp=xen_get_ivr
-	;;
-	mov out0=r8		// pass cr.ivr as first arg
-#else
-	mov out0=cr.ivr		// pass cr.ivr as first arg
-#endif
-	add out1=16,sp		// pass pointer to pt_regs as second arg
-	;;
-	srlz.d			// make sure we see the effect of cr.ivr
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_handle_irq
-END(interrupt)
-
-	.org ia64_ivt+0x3400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-	DBG_FAULT(13)
-	FAULT(13)
-
-	.org ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	DBG_FAULT(14)
-	FAULT(14)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 *
-	 * ia64_syscall_setup() is a separate subroutine so that it can
-	 *	allocate stacked registers so it can safely demine any
-	 *	potential NaT values from the input registers.
-	 *
-	 * On entry:
-	 *	- executing on bank 0 or bank 1 register set (doesn't matter)
-	 *	-  r1: stack pointer
-	 *	-  r2: current task pointer
-	 *	-  r3: preserved
-	 *	- r11: original contents (saved ar.pfs to be saved)
-	 *	- r12: original contents (sp to be saved)
-	 *	- r13: original contents (tp to be saved)
-	 *	- r15: original contents (syscall # to be saved)
-	 *	- r18: saved bsp (after switching to kernel stack)
-	 *	- r19: saved b6
-	 *	- r20: saved r1 (gp)
-	 *	- r21: saved ar.fpsr
-	 *	- r22: kernel's register backing store base (krbs_base)
-	 *	- r23: saved ar.bspstore
-	 *	- r24: saved ar.rnat
-	 *	- r25: saved ar.unat
-	 *	- r26: saved ar.pfs
-	 *	- r27: saved ar.rsc
-	 *	- r28: saved cr.iip
-	 *	- r29: saved cr.ipsr
-	 *	- r31: saved pr
-	 *	-  b0: original contents (to be saved)
-	 * On exit:
-	 *	-  p10: TRUE if syscall is invoked with more than 8 out
-	 *		registers or r15's Nat is true
-	 *	-  r1: kernel's gp
-	 *	-  r3: preserved (same as on entry)
-	 *	-  r8: -EINVAL if p10 is true
-	 *	- r12: points to kernel stack
-	 *	- r13: points to current task
-	 *	- r14: preserved (same as on entry)
-	 *	- p13: preserved
-	 *	- p15: TRUE if interrupts need to be re-enabled
-	 *	- ar.fpsr: set to kernel settings
-	 *	-  b6: preserved (same as on entry)
-	 */
-#ifndef CONFIG_XEN
-GLOBAL_ENTRY(ia64_syscall_setup)
-#if PT(B6) != 0
-# error This code assumes that b6 is the first field in pt_regs.
-#endif
-	st8 [r1]=r19				// save b6
-	add r16=PT(CR_IPSR),r1			// initialize first base pointer
-	add r17=PT(R11),r1			// initialize second base pointer
-	;;
-	alloc r19=ar.pfs,8,0,0,0		// ensure in0-in7 are writable
-	st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)	// save cr.ipsr
-	tnat.nz p8,p0=in0
-
-	st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)	// save r11
-	tnat.nz p9,p0=in1
-(pKStk)	mov r18=r0				// make sure r18 isn't NaT
-	;;
-
-	st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)	// save ar.pfs
-	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
-	mov r28=b0				// save b0 (2 cyc)
-	;;
-
-	st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)	// save ar.unat
-	dep r19=0,r19,38,26			// clear all bits but 0..37 [I0]
-(p8)	mov in0=-1
-	;;
-
-	st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)	// store ar.pfs.pfm in cr.ifs
-	extr.u r11=r19,7,7	// I0		// get sol of ar.pfs
-	and r8=0x7f,r19		// A		// get sof of ar.pfs
-
-	st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
-(p9)	mov in1=-1
-	;;
-
-(pUStk) sub r18=r18,r22				// r18=RSE.ndirty*8
-	tnat.nz p10,p0=in2
-	add r11=8,r11
-	;;
-(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16		// skip over ar_rnat field
-(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17	// skip over ar_bspstore field
-	tnat.nz p11,p0=in3
-	;;
-(p10)	mov in2=-1
-	tnat.nz p12,p0=in4				// [I0]
-(p11)	mov in3=-1
-	;;
-(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)	// save ar.rnat
-(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)	// save ar.bspstore
-	shl r18=r18,16				// compute ar.rsc to be used for "loadrs"
-	;;
-	st8 [r16]=r31,PT(LOADRS)-PT(PR)		// save predicates
-	st8 [r17]=r28,PT(R1)-PT(B0)		// save b0
-	tnat.nz p13,p0=in5				// [I0]
-	;;
-	st8 [r16]=r18,PT(R12)-PT(LOADRS)	// save ar.rsc value for "loadrs"
-	st8.spill [r17]=r20,PT(R13)-PT(R1)	// save original r1
-(p12)	mov in4=-1
-	;;
-
-.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
-.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
-(p13)	mov in5=-1
-	;;
-	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
-	tnat.nz p13,p0=in6
-	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
-	;;
-	mov r8=1
-(p9)	tnat.nz p10,p0=r15
-	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
-
-	st8.spill [r17]=r15			// save r15
-	tnat.nz p8,p0=in7
-	nop.i 0
-
-	mov r13=r2				// establish `current'
-	movl r1=__gp				// establish kernel global pointer
-	;;
-	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
-(p13)	mov in6=-1
-(p8)	mov in7=-1
-
-	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
-	movl r17=FPSR_DEFAULT
-	;;
-	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
-(p10)	mov r8=-EINVAL
-	br.ret.sptk.many b7
-END(ia64_syscall_setup)
-#endif
-
-	.org ia64_ivt+0x3c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	DBG_FAULT(15)
-	FAULT(15)
-
-	/*
-	 * Squatting in this space ...
-	 *
-	 * This special case dispatcher for illegal operation faults allows preserved
-	 * registers to be modified through a callback function (asm only) that is handed
-	 * back from the fault handler in r8. Up to three arguments can be passed to the
-	 * callback function by returning an aggregate with the callback as its first
-	 * element, followed by the arguments.
-	 */
-ENTRY(dispatch_illegal_op_fault)
-	.prologue
-	.body
-	SAVE_MIN_WITH_COVER
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i		// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i	// restore psr.i
-	adds r3=8,r2	// set up second base pointer for SAVE_REST
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
-	mov out0=ar.ec
-	;;
-	SAVE_REST
-	PT_REGS_UNWIND_INFO(0)
-	;;
-	br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0:	;;
-	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
-	mov out0=r9
-	mov out1=r10
-	mov out2=r11
-	movl r15=ia64_leave_kernel
-	;;
-	mov rp=r15
-	mov b6=r8
-	;;
-	cmp.ne p6,p0=0,r8
-(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
-	br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
-	.org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	DBG_FAULT(16)
-	FAULT(16)
-
-	.org ia64_ivt+0x4400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	DBG_FAULT(17)
-	FAULT(17)
-
-ENTRY(non_syscall)
-	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
-	;;
-	SAVE_MIN_WITH_COVER
-
-	// There is no particular reason for this code to be here, other than that
-	// there happens to be space here that would go unused otherwise.  If this
-	// fault ever gets "unreserved", simply moved the following code to a more
-	// suitable spot...
-
-	alloc r14=ar.pfs,0,0,2,0
-	mov out0=cr.iim
-	add out1=16,sp
-	adds r3=8,r2			// set up second base pointer for SAVE_REST
-
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i				// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i			// restore psr.i
-	movl r15=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r15
-	;;
-	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
-	.org ia64_ivt+0x4800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	DBG_FAULT(18)
-	FAULT(18)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_unaligned_handler)
-	SAVE_MIN_WITH_COVER
-	;;
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
-	mov out0=cr.ifa
-	adds out1=16,sp
-
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	adds r3=8,r2				// set up second base pointer
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
-	.org ia64_ivt+0x4c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	DBG_FAULT(19)
-	FAULT(19)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_to_fault_handler)
-	/*
-	 * Input:
-	 *	psr.ic:	off
-	 *	r19:	fault vector number (e.g., 24 for General Exception)
-	 *	r31:	contains saved predicates (pr)
-	 */
-	SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=r15
-#ifdef CONFIG_XEN
-	movl out1=XSI_ISR
-	;;
-	adds out2=XSI_IFA-XSI_ISR,out1
-	adds out3=XSI_IIM-XSI_ISR,out1
-	adds out4=XSI_ITIR-XSI_ISR,out1
-	;;
-	ld8 out1=[out1]
-	ld8 out2=[out2]
-	ld8 out3=[out4]
-	ld8 out4=[out4]
-	;;
-#else
-	mov out1=cr.isr
-	mov out2=cr.ifa
-	mov out3=cr.iim
-	mov out4=cr.itir
-	;;
-#endif
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i				// restore psr.i
-	adds r3=8,r2				// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
-//
-// --- End of long entries, Beginning of short entries
-//
-
-	.org ia64_ivt+0x5000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
-ENTRY(page_not_present)
-	DBG_FAULT(20)
-	mov r16=cr.ifa
-	rsm psr.dt
-	/*
-	 * The Linux page fault handler doesn't expect non-present pages to be in
-	 * the TLB.  Flush the existing entry now, so we meet that expectation.
-	 */
-	mov r17=PAGE_SHIFT<<2
-	;;
-	ptc.l r16,r17
-	;;
-	mov r31=pr
-	srlz.d
-	br.sptk.many page_fault
-END(page_not_present)
-
-	.org ia64_ivt+0x5100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
-ENTRY(key_permission)
-	DBG_FAULT(21)
-	mov r16=cr.ifa
-	rsm psr.dt
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(key_permission)
-
-	.org ia64_ivt+0x5200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(iaccess_rights)
-	DBG_FAULT(22)
-	mov r16=cr.ifa
-	rsm psr.dt
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(iaccess_rights)
-
-	.org ia64_ivt+0x5300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(daccess_rights)
-	DBG_FAULT(23)
-#ifdef CONFIG_XEN
-	movl r16=XSI_IFA
-	;;
-	ld8 r16=[r16]
-	;;
-	XEN_HYPER_RSM_PSR_DT
-#else
-	mov r16=cr.ifa
-	rsm psr.dt
-#endif
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(daccess_rights)
-
-	.org ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(general_exception)
-	DBG_FAULT(24)
-	mov r16=cr.isr
-	mov r31=pr
-	;;
-	cmp4.eq p6,p0=0,r16
-(p6)	br.sptk.many dispatch_illegal_op_fault
-	;;
-	mov r19=24		// fault number
-	br.sptk.many dispatch_to_fault_handler
-END(general_exception)
-
-	.org ia64_ivt+0x5500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(disabled_fp_reg)
-	DBG_FAULT(25)
-	rsm psr.dfh		// ensure we can access fph
-	;;
-	srlz.d
-	mov r31=pr
-	mov r19=25
-	br.sptk.many dispatch_to_fault_handler
-END(disabled_fp_reg)
-
-	.org ia64_ivt+0x5600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(nat_consumption)
-	DBG_FAULT(26)
-
-	mov r16=cr.ipsr
-	mov r17=cr.isr
-	mov r31=pr				// save PR
-	;;
-	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
-	tbit.z p6,p0=r17,IA64_ISR_NA_BIT
-	;;
-	cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
-	dep r16=-1,r16,IA64_PSR_ED_BIT,1
-(p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
-	;;
-	mov cr.ipsr=r16		// set cr.ipsr.na
-	mov pr=r31,-1
-	;;
-	rfi
-
-1:	mov pr=r31,-1
-	;;
-	FAULT(26)
-END(nat_consumption)
-
-	.org ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(speculation_vector)
-	DBG_FAULT(27)
-	/*
-	 * A [f]chk.[as] instruction needs to take the branch to the recovery code but
-	 * this part of the architecture is not implemented in hardware on some CPUs, such
-	 * as Itanium.  Thus, in general we need to emulate the behavior.  IIM contains
-	 * the relative target (not yet sign extended).  So after sign extending it we
-	 * simply add it to IIP.  We also need to reset the EI field of the IPSR to zero,
-	 * i.e., the slot to restart into.
-	 *
-	 * cr.imm contains zero_ext(imm21)
-	 */
-	mov r18=cr.iim
-	;;
-	mov r17=cr.iip
-	shl r18=r18,43			// put sign bit in position (43=64-21)
-	;;
-
-	mov r16=cr.ipsr
-	shr r18=r18,39			// sign extend (39=43-4)
-	;;
-
-	add r17=r17,r18			// now add the offset
-	;;
-	mov cr.iip=r17
-	dep r16=0,r16,41,2		// clear EI
-	;;
-
-	mov cr.ipsr=r16
-	;;
-
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI;
-#else
-	rfi				// and go back
-#endif
-END(speculation_vector)
-
-	.org ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	DBG_FAULT(28)
-	FAULT(28)
-
-	.org ia64_ivt+0x5900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(debug_vector)
-	DBG_FAULT(29)
-	FAULT(29)
-END(debug_vector)
-
-	.org ia64_ivt+0x5a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(unaligned_access)
-	DBG_FAULT(30)
-	mov r31=pr		// prepare to save predicates
-	;;
-	br.sptk.many dispatch_unaligned_handler
-END(unaligned_access)
-
-	.org ia64_ivt+0x5b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(unsupported_data_reference)
-	DBG_FAULT(31)
-	FAULT(31)
-END(unsupported_data_reference)
-
-	.org ia64_ivt+0x5c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
-ENTRY(floating_point_fault)
-	DBG_FAULT(32)
-	FAULT(32)
-END(floating_point_fault)
-
-	.org ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(floating_point_trap)
-	DBG_FAULT(33)
-	FAULT(33)
-END(floating_point_trap)
-
-	.org ia64_ivt+0x5e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(lower_privilege_trap)
-	DBG_FAULT(34)
-	FAULT(34)
-END(lower_privilege_trap)
-
-	.org ia64_ivt+0x5f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(taken_branch_trap)
-	DBG_FAULT(35)
-	FAULT(35)
-END(taken_branch_trap)
-
-	.org ia64_ivt+0x6000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(single_step_trap)
-	DBG_FAULT(36)
-	FAULT(36)
-END(single_step_trap)
-
-	.org ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Reserved
-	DBG_FAULT(37)
-	FAULT(37)
-
-	.org ia64_ivt+0x6200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	DBG_FAULT(38)
-	FAULT(38)
-
-	.org ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	DBG_FAULT(39)
-	FAULT(39)
-
-	.org ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	DBG_FAULT(40)
-	FAULT(40)
-
-	.org ia64_ivt+0x6500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	DBG_FAULT(41)
-	FAULT(41)
-
-	.org ia64_ivt+0x6600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	DBG_FAULT(42)
-	FAULT(42)
-
-	.org ia64_ivt+0x6700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	DBG_FAULT(43)
-	FAULT(43)
-
-	.org ia64_ivt+0x6800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	DBG_FAULT(44)
-	FAULT(44)
-
-	.org ia64_ivt+0x6900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(ia32_exception)
-	DBG_FAULT(45)
-	FAULT(45)
-END(ia32_exception)
-
-	.org ia64_ivt+0x6a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(ia32_intercept)
-	DBG_FAULT(46)
-#ifdef	CONFIG_IA32_SUPPORT
-	mov r31=pr
-	mov r16=cr.isr
-	;;
-	extr.u r17=r16,16,8	// get ISR.code
-	mov r18=ar.eflag
-	mov r19=cr.iim		// old eflag value
-	;;
-	cmp.ne p6,p0=2,r17
-(p6)	br.cond.spnt 1f		// not a system flag fault
-	xor r16=r18,r19
-	;;
-	extr.u r17=r16,18,1	// get the eflags.ac bit
-	;;
-	cmp.eq p6,p0=0,r17
-(p6)	br.cond.spnt 1f		// eflags.ac bit didn't change
-	;;
-	mov pr=r31,-1		// restore predicate registers
-#ifdef CONFIG_XEN
-	XEN_HYPER_RFI;
-#else
-	rfi
-#endif
-
-1:
-#endif	// CONFIG_IA32_SUPPORT
-	FAULT(46)
-END(ia32_intercept)
-
-	.org ia64_ivt+0x6b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
-ENTRY(ia32_interrupt)
-	DBG_FAULT(47)
-#ifdef CONFIG_IA32_SUPPORT
-	mov r31=pr
-	br.sptk.many dispatch_to_ia32_handler
-#else
-	FAULT(47)
-#endif
-END(ia32_interrupt)
-
-	.org ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	DBG_FAULT(48)
-	FAULT(48)
-
-	.org ia64_ivt+0x6d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	DBG_FAULT(49)
-	FAULT(49)
-
-	.org ia64_ivt+0x6e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	DBG_FAULT(50)
-	FAULT(50)
-
-	.org ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	DBG_FAULT(51)
-	FAULT(51)
-
-	.org ia64_ivt+0x7000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7000 Entry 52 (size 16 bundles) Reserved
-	DBG_FAULT(52)
-	FAULT(52)
-
-	.org ia64_ivt+0x7100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	DBG_FAULT(53)
-	FAULT(53)
-
-	.org ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	DBG_FAULT(54)
-	FAULT(54)
-
-	.org ia64_ivt+0x7300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	DBG_FAULT(55)
-	FAULT(55)
-
-	.org ia64_ivt+0x7400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	DBG_FAULT(56)
-	FAULT(56)
-
-	.org ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	DBG_FAULT(57)
-	FAULT(57)
-
-	.org ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	DBG_FAULT(58)
-	FAULT(58)
-
-	.org ia64_ivt+0x7700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	DBG_FAULT(59)
-	FAULT(59)
-
-	.org ia64_ivt+0x7800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	DBG_FAULT(60)
-	FAULT(60)
-
-	.org ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	DBG_FAULT(61)
-	FAULT(61)
-
-	.org ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	DBG_FAULT(62)
-	FAULT(62)
-
-	.org ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	DBG_FAULT(63)
-	FAULT(63)
-
-	.org ia64_ivt+0x7c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	DBG_FAULT(64)
-	FAULT(64)
-
-	.org ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	DBG_FAULT(65)
-	FAULT(65)
-
-	.org ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	DBG_FAULT(66)
-	FAULT(66)
-
-	.org ia64_ivt+0x7f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	DBG_FAULT(67)
-	FAULT(67)
-
-#ifdef CONFIG_IA32_SUPPORT
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-	// IA32 interrupt entry point
-
-ENTRY(dispatch_to_ia32_handler)
-	SAVE_MIN
-	;;
-	mov r14=cr.isr
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
-	;;
-(p15)	ssm psr.i
-	adds r3=8,r2		// Base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	;;
-	mov r15=0x80
-	shr r14=r14,16		// Get interrupt number
-	;;
-	cmp.ne p6,p0=r14,r15
-(p6)	br.call.dpnt.many b6=non_ia32_syscall
-
-	adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp	// 16 byte hole per SW conventions
-	adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
-	;;
-	cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
-	ld8 r8=[r14]		// get r8
-	;;
-	st8 [r15]=r8		// save original EAX in r1 (IA32 procs don't use the GP)
-	;;
-	alloc r15=ar.pfs,0,0,6,0	// must first in an insn group
-	;;
-	ld4 r8=[r14],8		// r8 == eax (syscall number)
-	mov r15=IA32_NR_syscalls
-	;;
-	cmp.ltu.unc p6,p7=r8,r15
-	ld4 out1=[r14],8	// r9 == ecx
-	;;
-	ld4 out2=[r14],8	// r10 == edx
-	;;
-	ld4 out0=[r14]		// r11 == ebx
-	adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
-	;;
-	ld4 out5=[r14],PT(R14)-PT(R13)	// r13 == ebp
-	;;
-	ld4 out3=[r14],PT(R15)-PT(R14)	// r14 == esi
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 out4=[r14]		// r15 == edi
-	movl r16=ia32_syscall_table
-	;;
-(p6)	shladd r16=r8,3,r16	// force ni_syscall if not valid syscall number
-	ld4 r2=[r2]		// r2 = current_thread_info()->flags
-	;;
-	ld8 r16=[r16]
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
-	;;
-	mov b6=r16
-	movl r15=ia32_ret_from_syscall
-	cmp.eq p8,p0=r2,r0
-	;;
-	mov rp=r15
-(p8)	br.call.sptk.many b6=b6
-	br.cond.sptk ia32_trace_syscall
-
-non_ia32_syscall:
-	alloc r15=ar.pfs,0,0,2,0
-	mov out0=r14				// interrupt #
-	add out1=16,sp				// pointer to pt_regs
-	;;			// avoid WAW on CFM
-	br.call.sptk.many rp=ia32_bad_interrupt
-.ret1:	movl r15=ia64_leave_kernel
-	;;
-	mov rp=r15
-	br.ret.sptk.many rp
-END(dispatch_to_ia32_handler)
-#endif /* CONFIG_IA32_SUPPORT */
-
-#ifdef CONFIG_XEN
-	.section .text,"ax"
-GLOBAL_ENTRY(xen_event_callback)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	;;
-	movl r3=XSI_PSR_IC
-	mov r14=1
-	;;
-	st4 [r3]=r14
-	;;
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-	SAVE_REST
-	;;
-1:
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	add out0=16,sp		// pass pointer to pt_regs as first arg
-	;;
-	br.call.sptk.many b0=evtchn_do_upcall
-	;;
-	movl r20=XSI_PSR_I_ADDR
-	;;
-	ld8 r20=[r20]
-	;;
-	adds r20=-1,r20		// vcpu_info->evtchn_upcall_pending
-	;;
-	ld1 r20=[r20]
-	;;
-	cmp.ne p6,p0=r20,r0	// if there are pending events, 
-	(p6) br.spnt.few 1b	// call evtchn_do_upcall again.
-	br.sptk.many ia64_leave_kernel   
-END(xen_event_callback)
-
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-GLOBAL_ENTRY(xen_bsw1)
-	/* FIXME: THIS CODE IS NOT NaT SAFE! */
-	mov r14=ar.unat
-	movl r30=XSI_B1NAT
-	;;
-	ld8 r30=[r30];;
-	mov ar.unat=r30
-	movl r30=XSI_BANKNUM;
-	mov r31=1;;
-	st4 [r30]=r31;
-	movl r30=XSI_BANK1_R16;
-	movl r31=XSI_BANK1_R16+8;;
-	ld8.fill r16=[r30],16; ld8.fill r17=[r31],16;;
-	ld8.fill r18=[r30],16; ld8.fill r19=[r31],16;;
-	ld8.fill r20=[r30],16; ld8.fill r21=[r31],16;;
-	ld8.fill r22=[r30],16; ld8.fill r23=[r31],16;;
-	ld8.fill r24=[r30],16; ld8.fill r25=[r31],16;;
-	ld8.fill r26=[r30],16; ld8.fill r27=[r31],16;;
-	ld8.fill r28=[r30],16; ld8.fill r29=[r31],16;;
-	ld8.fill r30=[r30]; ld8.fill r31=[r31];;
-	mov ar.unat=r14
-	br.ret.sptk.many b0
-END(xen_bsw1)
-
-   
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h b/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
deleted file mode 100644
index 5741b4e75d..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenminstate.h
+++ /dev/null
@@ -1,358 +0,0 @@
-#include <asm/cache.h>
-
-#ifdef CONFIG_XEN
-#include "../kernel/entry.h"
-#else
-#include "entry.h"
-#endif
-
-/*
- * For ivt.s we want to access the stack virtually so we don't have to disable translation
- * on interrupts.
- *
- *  On entry:
- *	r1:	pointer to current task (ar.k6)
- */
-#define MINSTATE_START_SAVE_MIN_VIRT								\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-	;;											\
-(pUStk)	mov.m r24=ar.rnat;									\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
-(pKStk) mov r1=sp;					/* get sp  */				\
-	;;											\
-(pUStk) lfetch.fault.excl.nt1 [r22];								\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-	;;											\
-(pUStk)	mov ar.bspstore=r22;				/* switch to kernel RBS */		\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-
-#define MINSTATE_END_SAVE_MIN_VIRT								\
-	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
-	;;
-
-/*
- * For mca_asm.S we want to access the stack physically since the state is saved before we
- * go virtual and don't want to destroy the iip or ipsr.
- */
-#define MINSTATE_START_SAVE_MIN_PHYS								\
-(pKStk) mov r3=IA64_KR(PER_CPU_DATA);;								\
-(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;;							\
-(pKStk) ld8 r3 = [r3];;										\
-(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;;						\
-(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3;						\
-(pUStk)	mov ar.rsc=0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
-(pUStk)	addl r22=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
-	;;											\
-(pUStk)	mov r24=ar.rnat;									\
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
-(pUStk)	mov r23=ar.bspstore;				/* save ar.bspstore */			\
-(pUStk)	dep r22=-1,r22,61,3;			/* compute kernel virtual addr of RBS */	\
-	;;											\
-(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
-(pUStk)	mov ar.bspstore=r22;			/* switch to kernel RBS */			\
-	;;											\
-(pUStk)	mov r18=ar.bsp;										\
-(pUStk)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
-
-#define MINSTATE_END_SAVE_MIN_PHYS								\
-	dep r12=-1,r12,61,3;		/* make sp a kernel virtual address */			\
-	;;
-
-#ifdef MINSTATE_VIRT
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT)
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
-#endif
-
-#ifdef MINSTATE_PHYS
-# define MINSTATE_GET_CURRENT(reg)	mov reg=IA64_KR(CURRENT);; tpa reg=reg
-# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
-# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
-#endif
-
-/*
- * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *	psr.ic: off
- *	r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *	psr.ic: off
- *	 r2 = points to &pt_regs.r16
- *	 r8 = contents of ar.ccv
- *	 r9 = contents of ar.csd
- *	r10 = contents of ar.ssd
- *	r11 = FPSR_DEFAULT
- *	r12 = kernel sp (kernel virtual address)
- *	r13 = points to current task_struct (kernel virtual address)
- *	p15 = TRUE if psr.i is set in cr.ipsr
- *	predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *		preserved
- * CONFIG_XEN note: p6/p7 are not preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-#ifdef CONFIG_XEN
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
-	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
-	mov r27=ar.rsc;			/* M */							\
-	mov r20=r1;			/* A */							\
-	mov r25=ar.unat;		/* M */							\
-	/* mov r29=cr.ipsr;		/* M */							\
-	movl r29=XSI_IPSR;;									\
-	ld8 r29=[r29];;										\
-	mov r26=ar.pfs;			/* I */							\
-	/* mov r28=cr.iip;		/* M */							\
-	movl r28=XSI_IIP;;									\
-	ld8 r28=[r28];;										\
-	mov r21=ar.fpsr;		/* M */							\
-	COVER;			/* B;; (or nothing) */					\
-	;;											\
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
-	;;											\
-	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
-	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
-	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
-	/* switch from user to kernel RBS: */							\
-	;;											\
-	invala;				/* M */							\
-	/* SAVE_IFS; /* see xen special handling below */						\
-	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
-	;;											\
-	MINSTATE_START_SAVE_MIN									\
-	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
-	adds r16=PT(CR_IPSR),r1;								\
-	;;											\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
-	st8 [r16]=r29;		/* save cr.ipsr */						\
-	;;											\
-	lfetch.fault.excl.nt1 [r17];								\
-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
-	mov r29=b0										\
-	;;											\
-	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
-	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
-(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r8,16;								\
-.mem.offset 8,0; st8.spill [r17]=r9,16;								\
-        ;;											\
-.mem.offset 0,0; st8.spill [r16]=r10,24;							\
-.mem.offset 8,0; st8.spill [r17]=r11,24;							\
-        ;;											\
-	/* xen special handling for possibly lazy cover */					\
-	movl r8=XSI_PRECOVER_IFS;								\
-	;;											\
-	ld8 r30=[r8];										\
-	;;											\
-	st8 [r16]=r28,16;	/* save cr.iip */						\
-	st8 [r17]=r30,16;	/* save cr.ifs */						\
-(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
-	mov r8=ar.ccv;										\
-	mov r9=ar.csd;										\
-	mov r10=ar.ssd;										\
-	movl r11=FPSR_DEFAULT;   /* L-unit */							\
-	;;											\
-	st8 [r16]=r25,16;	/* save ar.unat */						\
-	st8 [r17]=r26,16;	/* save ar.pfs */						\
-	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
-	;;											\
-	st8 [r16]=r27,16;	/* save ar.rsc */						\
-(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
-(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
-	;;			/* avoid RAW on r16 & r17 */					\
-(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
-	st8 [r17]=r31,16;	/* save predicates */						\
-(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
-	;;											\
-	st8 [r16]=r29,16;	/* save b0 */							\
-	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
-	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
-.mem.offset 8,0; st8.spill [r17]=r12,16;							\
-	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r13,16;							\
-.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
-	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r15,16;							\
-.mem.offset 8,0; st8.spill [r17]=r14,16;							\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r2,16;								\
-.mem.offset 8,0; st8.spill [r17]=r3,16;								\
-	;;											\
-	EXTRA;											\
-	mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2;					\
-	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
-	;;											\
-	movl r1=__gp;		/* establish kernel global pointer */				\
-	;;											\
-	/* MINSTATE_END_SAVE_MIN */
-#else
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)							\
-	MINSTATE_GET_CURRENT(r16);	/* M (or M;;I) */					\
-	mov r27=ar.rsc;			/* M */							\
-	mov r20=r1;			/* A */							\
-	mov r25=ar.unat;		/* M */							\
-	mov r29=cr.ipsr;		/* M */							\
-	mov r26=ar.pfs;			/* I */							\
-	mov r28=cr.iip;			/* M */							\
-	mov r21=ar.fpsr;		/* M */							\
-	COVER;				/* B;; (or nothing) */					\
-	;;											\
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16;						\
-	;;											\
-	ld1 r17=[r16];				/* load current->thread.on_ustack flag */	\
-	st1 [r16]=r0;				/* clear current->thread.on_ustack flag */	\
-	adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16						\
-	/* switch from user to kernel RBS: */							\
-	;;											\
-	invala;				/* M */							\
-	SAVE_IFS;										\
-	cmp.eq pKStk,pUStk=r0,r17;		/* are we in kernel mode already? */		\
-	;;											\
-	MINSTATE_START_SAVE_MIN									\
-	adds r17=2*L1_CACHE_BYTES,r1;		/* really: biggest cache-line size */		\
-	adds r16=PT(CR_IPSR),r1;								\
-	;;											\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;						\
-	st8 [r16]=r29;		/* save cr.ipsr */						\
-	;;											\
-	lfetch.fault.excl.nt1 [r17];								\
-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT;							\
-	mov r29=b0										\
-	;;											\
-	adds r16=PT(R8),r1;	/* initialize first base pointer */				\
-	adds r17=PT(R9),r1;	/* initialize second base pointer */				\
-(pKStk)	mov r18=r0;		/* make sure r18 isn't NaT */					\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r8,16;								\
-.mem.offset 8,0; st8.spill [r17]=r9,16;								\
-        ;;											\
-.mem.offset 0,0; st8.spill [r16]=r10,24;							\
-.mem.offset 8,0; st8.spill [r17]=r11,24;							\
-        ;;											\
-	st8 [r16]=r28,16;	/* save cr.iip */						\
-	st8 [r17]=r30,16;	/* save cr.ifs */						\
-(pUStk)	sub r18=r18,r22;	/* r18=RSE.ndirty*8 */						\
-	mov r8=ar.ccv;										\
-	mov r9=ar.csd;										\
-	mov r10=ar.ssd;										\
-	movl r11=FPSR_DEFAULT;   /* L-unit */							\
-	;;											\
-	st8 [r16]=r25,16;	/* save ar.unat */						\
-	st8 [r17]=r26,16;	/* save ar.pfs */						\
-	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			\
-	;;											\
-	st8 [r16]=r27,16;	/* save ar.rsc */						\
-(pUStk)	st8 [r17]=r24,16;	/* save ar.rnat */						\
-(pKStk)	adds r17=16,r17;	/* skip over ar_rnat field */					\
-	;;			/* avoid RAW on r16 & r17 */					\
-(pUStk)	st8 [r16]=r23,16;	/* save ar.bspstore */						\
-	st8 [r17]=r31,16;	/* save predicates */						\
-(pKStk)	adds r16=16,r16;	/* skip over ar_bspstore field */				\
-	;;											\
-	st8 [r16]=r29,16;	/* save b0 */							\
-	st8 [r17]=r18,16;	/* save ar.rsc value for "loadrs" */				\
-	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r20,16;	/* save original r1 */				\
-.mem.offset 8,0; st8.spill [r17]=r12,16;							\
-	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r13,16;							\
-.mem.offset 8,0; st8.spill [r17]=r21,16;	/* save ar.fpsr */				\
-	mov r13=IA64_KR(CURRENT);	/* establish `current' */				\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r15,16;							\
-.mem.offset 8,0; st8.spill [r17]=r14,16;							\
-	;;											\
-.mem.offset 0,0; st8.spill [r16]=r2,16;								\
-.mem.offset 8,0; st8.spill [r17]=r3,16;								\
-	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
-	;;											\
-	EXTRA;											\
-	movl r1=__gp;		/* establish kernel global pointer */				\
-	;;											\
-	MINSTATE_END_SAVE_MIN
-#endif
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *	psr.ic: on
- *	r2:	points to &pt_regs.r16
- *	r3:	points to &pt_regs.r17
- *	r8:	contents of ar.ccv
- *	r9:	contents of ar.csd
- *	r10:	contents of ar.ssd
- *	r11:	FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define SAVE_REST				\
-.mem.offset 0,0; st8.spill [r2]=r16,16;		\
-.mem.offset 8,0; st8.spill [r3]=r17,16;		\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r18,16;		\
-.mem.offset 8,0; st8.spill [r3]=r19,16;		\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r20,16;		\
-.mem.offset 8,0; st8.spill [r3]=r21,16;		\
-	mov r18=b6;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r22,16;		\
-.mem.offset 8,0; st8.spill [r3]=r23,16;		\
-	mov r19=b7;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r24,16;		\
-.mem.offset 8,0; st8.spill [r3]=r25,16;		\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r26,16;		\
-.mem.offset 8,0; st8.spill [r3]=r27,16;		\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r28,16;		\
-.mem.offset 8,0; st8.spill [r3]=r29,16;		\
-	;;					\
-.mem.offset 0,0; st8.spill [r2]=r30,16;		\
-.mem.offset 8,0; st8.spill [r3]=r31,32;		\
-	;;					\
-	mov ar.fpsr=r11;	/* M-unit */	\
-	st8 [r2]=r8,8;		/* ar.ccv */	\
-	adds r24=PT(B6)-PT(F7),r3;		\
-	;;					\
-	stf.spill [r2]=f6,32;			\
-	stf.spill [r3]=f7,32;			\
-	;;					\
-	stf.spill [r2]=f8,32;			\
-	stf.spill [r3]=f9,32;			\
-	;;					\
-	stf.spill [r2]=f10;			\
-	stf.spill [r3]=f11;			\
-	adds r25=PT(B7)-PT(F11),r3;		\
-	;;					\
-	st8 [r24]=r18,16;       /* b6 */	\
-	st8 [r25]=r19,16;       /* b7 */	\
-	;;					\
-	st8 [r24]=r9;        	/* ar.csd */	\
-	st8 [r25]=r10;      	/* ar.ssd */	\
-	;;
-
-#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#ifdef CONFIG_XEN
-#define SAVE_MIN		break 0;; /* FIXME: non-cover version only for ia32 support? */
-#else
-#define SAVE_MIN		DO_SAVE_MIN(     , mov r30=r0, )
-#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
deleted file mode 100644
index d8ebfb994a..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * ia64/xen/xenpal.S
- *
- * Alternate PAL  routines for Xen.  Heavily leveraged from
- *   ia64/kernel/pal.S
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-
-GLOBAL_ENTRY(xen_pal_call_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc loc1 = ar.pfs,5,5,0,0
-#ifdef CONFIG_XEN
-	movl r22=running_on_xen;;
-	ld4 r22=[r22];;
-	cmp.eq p7,p0=r22,r0
-(p7)	br.cond.spnt.many __ia64_pal_call_static;;
-#endif
-	movl loc2 = pal_entry_point
-1:	{
-	  mov r28 = in0
-	  mov r29 = in1
-	  mov r8 = ip
-	}
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	tbit.nz p6,p7 = in4, 0
-	adds r8 = 1f-1b,r8
-	mov loc4=ar.rsc			// save RSE configuration
-	;;
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-#ifdef CONFIG_XEN
-	mov r9 = r8
-	XEN_HYPER_GET_PSR
-	;;
-	mov loc3 = r8
-	mov r8 = r9
-	;;
-#else    
-	mov loc3 = psr
-#endif    
-	mov loc0 = rp
-	.body
-	mov r30 = in2
-
-#ifdef CONFIG_XEN
-	// this is low priority for paravirtualization, but is called
-	// from the idle loop so confuses privop counting
-	movl r31=XSI_PSR_I_ADDR
-	;;
-	ld8 r31=[r31]
-	mov r22=1
-	;;
-	st1 [r31]=r22
-	;;  
-(p6)	movl r31=XSI_PSR_IC
-	;;
-(p6)	st4.rel [r31]=r0
-	;;
-	mov r31 = in3
-	mov b7 = loc2
-	;;
-#else
-(p6)	rsm psr.i | psr.ic
-	mov r31 = in3
-	mov b7 = loc2
-
-(p7)	rsm psr.i
-	;;
-(p6)	srlz.i
-#endif
-	mov rp = r8
-	br.cond.sptk.many b7
-1:	mov psr.l = loc3
-	mov ar.rsc = loc4		// restore RSE configuration
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(xen_pal_call_static)
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
deleted file mode 100644
index 469f39e226..0000000000
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Support routines for Xen
- *
- * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@hp.com>
- */
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-
-#define isBP	p3	// are we the Bootstrap Processor?
-
-	.text
-GLOBAL_ENTRY(early_xen_setup)
-	mov r8=ar.rsc		// Initialized in head.S
-(isBP)	movl r9=running_on_xen;;
-	extr.u r8=r8,2,2;;	// Extract pl fields
-	cmp.eq p7,p0=r8,r0	// p7: !running on xen
-	mov r8=1		// booleanize.
-(p7)	br.ret.sptk.many rp;;
-(isBP)	st4 [r9]=r8
-	movl r10=xen_ivt;;
-	
-	mov cr.iva=r10
-
-	/* Set xsi base.  */
-#define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600
-(isBP)	mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
-(isBP)	movl r28=XSI_BASE;;
-(isBP)	break 0x1000;;
-
-	br.ret.sptk.many rp
-	;;
-END(early_xen_setup)
-
-#include <xen/interface/xen.h>
-
-/* Stub for suspend.
-   Just force the stacked registers to be written in memory.  */	
-GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
-	;; 
-	alloc r20=ar.pfs,0,0,6,0
-	mov r2=__HYPERVISOR_sched_op
-	;; 
-	/* We don't want to deal with RSE.  */
-	flushrs
-	mov r33=r32
-	mov r32=2 // SCHEDOP_shutdown
-	;;
-	break 0x1000
-	;; 
-	br.ret.sptk.many b0
-END(xencomm_arch_hypercall_suspend)